@@ -22,13 +22,28 @@ import (
2222 "context"
2323 "fmt"
2424 "io"
25+ "net/http"
26+ "net/url"
2527 "os"
2628
29+ "github.com/fluxcd/pkg/tar"
30+ "github.com/google/go-containerregistry/pkg/authn"
2731 "github.com/google/go-containerregistry/pkg/crane"
2832 "github.com/google/go-containerregistry/pkg/name"
29- gcrv1 "github.com/google /go-containerregistry/pkg/v1 "
33+ "github.com/hashicorp /go-retryablehttp "
3034
31- "github.com/fluxcd/pkg/tar"
35+ v1 "github.com/google/go-containerregistry/pkg/v1"
36+ "github.com/google/go-containerregistry/pkg/v1/remote"
37+ "github.com/google/go-containerregistry/pkg/v1/remote/transport"
38+ "golang.org/x/sync/errgroup"
39+ )
40+
41+ const (
42+ // thresholdForConcurrentPull is the maximum size of a layer to be extracted in one go.
43+ // If the layer is larger than this, it will be downloaded in chunks.
44+ thresholdForConcurrentPull = 100 * 1024 * 1024 // 100MB
45+ // maxConcurrentPulls is the maximum number of concurrent downloads.
46+ maxConcurrentPulls = 10
3247)
3348
3449var (
3954
4055// PullOptions contains options for pulling a layer.
4156type PullOptions struct {
42- layerIndex int
43- layerType LayerType
57+ layerIndex int
58+ layerType LayerType
59+ transport http.RoundTripper
60+ auth authn.Authenticator
61+ keychain authn.Keychain
62+ concurrency int
4463}
4564
4665// PullOption is a function for configuring PullOptions.
@@ -60,22 +79,53 @@ func WithPullLayerIndex(i int) PullOption {
6079 }
6180}
6281
82+ func WithTransport (t http.RoundTripper ) PullOption {
83+ return func (o * PullOptions ) {
84+ o .transport = t
85+ }
86+ }
87+
88+ func WithConcurrency (c int ) PullOption {
89+ return func (o * PullOptions ) {
90+ o .concurrency = c
91+ }
92+ }
93+
6394// Pull downloads an artifact from an OCI repository and extracts the content.
6495// It untar or copies the content to the given outPath depending on the layerType.
6596// If no layer type is given, it tries to determine the right type by checking compressed content of the layer.
66- func (c * Client ) Pull (ctx context.Context , url , outPath string , opts ... PullOption ) (* Metadata , error ) {
97+ func (c * Client ) Pull (ctx context.Context , urlString , outPath string , opts ... PullOption ) (* Metadata , error ) {
6798 o := & PullOptions {
6899 layerIndex : 0 ,
69100 }
101+ o .keychain = authn .DefaultKeychain
70102 for _ , opt := range opts {
71103 opt (o )
72104 }
73- ref , err := name .ParseReference (url )
105+
106+ if o .concurrency == 0 || o .concurrency > maxConcurrentPulls {
107+ o .concurrency = maxConcurrentPulls
108+ }
109+
110+ if o .transport == nil {
111+ transport := remote .DefaultTransport .(* http.Transport ).Clone ()
112+ o .transport = transport
113+ }
114+
115+ ref , err := name .ParseReference (urlString )
74116 if err != nil {
75117 return nil , fmt .Errorf ("invalid URL: %w" , err )
76118 }
77119
78- img , err := crane .Pull (url , c .optionsWithContext (ctx )... )
120+ if c .httpClient == nil {
121+ h , err := makeHttpClient (ctx , ref .Context (), * o )
122+ if err != nil {
123+ return nil , err
124+ }
125+ c .httpClient = h
126+ }
127+
128+ img , err := crane .Pull (urlString , c .optionsWithContext (ctx )... )
79129 if err != nil {
80130 return nil , err
81131 }
@@ -91,7 +141,7 @@ func (c *Client) Pull(ctx context.Context, url, outPath string, opts ...PullOpti
91141 }
92142
93143 meta := MetadataFromAnnotations (manifest .Annotations )
94- meta .URL = url
144+ meta .URL = urlString
95145 meta .Digest = ref .Context ().Digest (digest .String ()).String ()
96146
97147 layers , err := img .Layers ()
@@ -107,15 +157,133 @@ func (c *Client) Pull(ctx context.Context, url, outPath string, opts ...PullOpti
107157 return nil , fmt .Errorf ("index '%d' out of bound for '%d' layers in artifact" , o .layerIndex , len (layers ))
108158 }
109159
160+ size , err := layers [o .layerIndex ].Size ()
161+ if err != nil {
162+ return nil , fmt .Errorf ("failed to get layer size: %w" , err )
163+ }
164+
165+ if size > thresholdForConcurrentPull {
166+ digest , err := layers [o .layerIndex ].Digest ()
167+ if err != nil {
168+ return nil , fmt .Errorf ("parsing digest failed: %w" , err )
169+ }
170+ u := url.URL {
171+ Scheme : ref .Context ().Scheme (),
172+ Host : ref .Context ().RegistryStr (),
173+ Path : fmt .Sprintf ("/v2/%s/blobs/%s" , ref .Context ().RepositoryStr (), digest .String ()),
174+ }
175+ ok , err := c .IsRangeRequestEnabled (ctx , u )
176+ if err != nil {
177+ return nil , fmt .Errorf ("failed to check range request support: %w" , err )
178+ }
179+ if ok {
180+ err = c .concurrentExtractLayer (ctx , u , layers [o .layerIndex ], outPath , digest , size , o .concurrency )
181+ if err != nil {
182+ return nil , err
183+ }
184+ return meta , nil
185+ }
186+ }
187+
110188 err = extractLayer (layers [o .layerIndex ], outPath , o .layerType )
111189 if err != nil {
112190 return nil , err
113191 }
114192 return meta , nil
115193}
116194
195+ // TO DO: handle authentication handle using keychain for authentication
196+ func (c * Client ) IsRangeRequestEnabled (ctx context.Context , u url.URL ) (bool , error ) {
197+ req , err := retryablehttp .NewRequest (http .MethodHead , u .String (), nil )
198+ if err != nil {
199+ return false , err
200+ }
201+
202+ resp , err := c .httpClient .Do (req .WithContext (ctx ))
203+ if err != nil {
204+ return false , err
205+ }
206+
207+ if err := transport .CheckError (resp , http .StatusOK ); err != nil {
208+ return false , err
209+ }
210+
211+ if rangeUnit := resp .Header .Get ("Accept-Ranges" ); rangeUnit == "bytes" {
212+ return true , nil
213+ }
214+ for k , v := range resp .Header {
215+ fmt .Printf ("Header: %s, Value: %s\n " , k , v )
216+ }
217+ return false , nil
218+ }
219+
220+ func (c * Client ) concurrentExtractLayer (ctx context.Context , u url.URL , layer v1.Layer , path string , digest v1.Hash , size int64 , concurrency int ) error {
221+ chunkSize := size / int64 (concurrency )
222+ chunks := make ([][]byte , concurrency + 1 )
223+ diff := size % int64 (concurrency )
224+
225+ g , ctx := errgroup .WithContext (ctx )
226+ for i := 0 ; i < concurrency ; i ++ {
227+ i := i
228+ g .Go (func () (err error ) {
229+ start , end := int64 (i )* chunkSize , int64 (i + 1 )* chunkSize
230+ if i == concurrency - 1 {
231+ end += diff
232+ }
233+ req , err := retryablehttp .NewRequest (http .MethodGet , u .String (), nil )
234+ if err != nil {
235+ return fmt .Errorf ("failed to create a new request: %w" , err )
236+ }
237+ req .Header .Add ("Range" , fmt .Sprintf ("bytes=%d-%d" , start , end - 1 ))
238+ resp , err := c .httpClient .Do (req .WithContext (ctx ))
239+ if err != nil {
240+ return fmt .Errorf ("failed to download archive: %w" , err )
241+ }
242+ defer resp .Body .Close ()
243+
244+ if err := transport .CheckError (resp , http .StatusPartialContent ); err != nil {
245+ return fmt .Errorf ("failed to download archive from %s (status: %s)" , u .String (), resp .Status )
246+ }
247+
248+ c , err := io .ReadAll (io .LimitReader (resp .Body , end - start ))
249+ if err != nil {
250+ return fmt .Errorf ("failed to read response body: %w" , err )
251+ }
252+ chunks [i ] = c
253+ return nil
254+ })
255+ }
256+ err := g .Wait ()
257+ if err != nil {
258+ return err
259+ }
260+
261+ content := bufio .NewReader (bytes .NewReader (bytes .Join (chunks , nil )))
262+ d , s , err := v1 .SHA256 (content )
263+ if err != nil {
264+ return err
265+ }
266+ if d != digest {
267+ return fmt .Errorf ("digest mismatch: expected %s, got %s" , digest , d )
268+ }
269+ if s != size {
270+ return fmt .Errorf ("size mismatch: expected %d, got %d" , size , size )
271+ }
272+
273+ f , err := os .Create (path )
274+ if err != nil {
275+ return err
276+ }
277+
278+ _ , err = io .Copy (f , content )
279+ if err != nil {
280+ return fmt .Errorf ("error copying layer content: %s" , err )
281+ }
282+ return nil
283+ }
284+
117285// extractLayer extracts the Layer to the path
118- func extractLayer (layer gcrv1 .Layer , path string , layerType LayerType ) error {
286+ func extractLayer (layer v1 .Layer , path string , layerType LayerType ) error {
119287 var blob io.Reader
120288 blob , err := layer .Compressed ()
121289 if err != nil {
@@ -173,3 +341,40 @@ func isGzipBlob(buf *bufio.Reader) (bool, error) {
173341 }
174342 return bytes .Equal (b , gzipMagicHeader ), nil
175343}
344+
345+ type resource interface {
346+ Scheme () string
347+ RegistryStr () string
348+ Scope (string ) string
349+
350+ authn.Resource
351+ }
352+
353+ func makeHttpClient (ctx context.Context , target resource , o PullOptions ) (* retryablehttp.Client , error ) {
354+ auth := o .auth
355+ if o .keychain != nil {
356+ kauth , err := o .keychain .Resolve (target )
357+ if err != nil {
358+ return nil , err
359+ }
360+ auth = kauth
361+ }
362+
363+ reg , ok := target .(name.Registry )
364+ if ! ok {
365+ repo , ok := target .(name.Repository )
366+ if ! ok {
367+ return nil , fmt .Errorf ("unexpected resource: %T" , target )
368+ }
369+ reg = repo .Registry
370+ }
371+
372+ tr , err := transport .NewWithContext (ctx , reg , auth , o .transport , []string {target .Scope (transport .PullScope )})
373+ if err != nil {
374+ return nil , err
375+ }
376+
377+ h := retryablehttp .NewClient ()
378+ h .HTTPClient = & http.Client {Transport : tr }
379+ return h , nil
380+ }
0 commit comments