diff --git a/cmd/rwp/cmd/helpers/inference.go b/cmd/rwp/cmd/helpers/inference.go new file mode 100644 index 00000000..326757b0 --- /dev/null +++ b/cmd/rwp/cmd/helpers/inference.go @@ -0,0 +1,43 @@ +package helpers + +import ( + "errors" + + "github.com/readium/go-toolkit/pkg/streamer" +) + +type InferA11yMetadata streamer.InferA11yMetadata + +// String is used both by fmt.Print and by Cobra in help text +func (e *InferA11yMetadata) String() string { + if e == nil { + return "no" + } + switch *e { + case InferA11yMetadata(streamer.InferA11yMetadataMerged): + return "merged" + case InferA11yMetadata(streamer.InferA11yMetadataSplit): + return "split" + default: + return "no" + } +} + +func (e *InferA11yMetadata) Set(v string) error { + switch v { + case "no": + *e = InferA11yMetadata(streamer.InferA11yMetadataNo) + case "merged": + *e = InferA11yMetadata(streamer.InferA11yMetadataMerged) + case "split": + *e = InferA11yMetadata(streamer.InferA11yMetadataSplit) + default: + return errors.New(`must be one of "no", "merged", or "split"`) + } + return nil +} + +// Type is only used in help text. +func (e *InferA11yMetadata) Type() string { + return "string" +} diff --git a/cmd/rwp/cmd/helpers/inspector.go b/cmd/rwp/cmd/helpers/inspector.go new file mode 100644 index 00000000..4f82f9ef --- /dev/null +++ b/cmd/rwp/cmd/helpers/inspector.go @@ -0,0 +1,51 @@ +package helpers + +import ( + "io/fs" + + "github.com/pkg/errors" + "github.com/readium/go-toolkit/pkg/analyzer" + "github.com/readium/go-toolkit/pkg/manifest" +) + +type ImageInspector struct { + Filesystem fs.FS + Algorithms []manifest.HashAlgorithm + err error +} + +func (n *ImageInspector) Error() error { + return n.err +} + +// TransformHREF implements ManifestTransformer +func (n *ImageInspector) TransformHREF(href manifest.HREF) manifest.HREF { + // Identity + return href +} + +// TransformLink implements ManifestTransformer +func (n *ImageInspector) TransformLink(link manifest.Link) manifest.Link { + if n.err != nil || link.MediaType == nil || !link.MediaType.IsBitmap() { + return link + } + + newLink, err := analyzer.Image(n.Filesystem, link, n.Algorithms) + if err != nil { + n.err = errors.Wrap(err, "failed inspecting image "+link.Href.String()) + return link + } + return *newLink +} + +// TransformManifest implements ManifestTransformer +func (n *ImageInspector) TransformManifest(manifest manifest.Manifest) manifest.Manifest { + // Identity + return manifest +} + +// TransformMetadata implements ManifestTransformer +func (n *ImageInspector) TransformMetadata(metadata manifest.Metadata) manifest.Metadata { + // Identity + return metadata +} diff --git a/cmd/rwp/cmd/manifest.go b/cmd/rwp/cmd/manifest.go index 90b2207f..ef2fad26 100644 --- a/cmd/rwp/cmd/manifest.go +++ b/cmd/rwp/cmd/manifest.go @@ -1,13 +1,18 @@ package cmd import ( + "context" "encoding/json" - "errors" "fmt" "path/filepath" + "github.com/pkg/errors" + "github.com/readium/go-toolkit/cmd/rwp/cmd/helpers" "github.com/readium/go-toolkit/pkg/asset" + "github.com/readium/go-toolkit/pkg/fetcher" + "github.com/readium/go-toolkit/pkg/manifest" "github.com/readium/go-toolkit/pkg/streamer" + "github.com/readium/go-toolkit/pkg/util/url" "github.com/spf13/cobra" ) @@ -15,11 +20,19 @@ import ( var indentFlag string // Infer accessibility metadata. -var inferA11yFlag InferA11yMetadata +var inferA11yFlag helpers.InferA11yMetadata // Infer the number of pages from the generated position list. var inferPageCountFlag bool +/*var inferIgnoreImageHashesFlag []string + +var inferIgnoreImageDirectoryFlag string*/ + +var hash []string + +var inspectImagesFlag bool + var manifestCmd = &cobra.Command{ Use: "manifest ", Short: "Generate a Readium Web Publication Manifest for a publication", @@ -53,17 +66,42 @@ Examples: // occurs. cmd.SilenceUsage = true - path := filepath.Clean(args[0]) + path, err := url.FromFilepath(filepath.Clean(args[0])) + if err != nil { + return fmt.Errorf("failed creating URL from filepath: %w", err) + } pub, err := streamer.New(streamer.Config{ InferA11yMetadata: streamer.InferA11yMetadata(inferA11yFlag), InferPageCount: inferPageCountFlag, }).Open( + context.TODO(), asset.File(path), "", ) if err != nil { return fmt.Errorf("failed opening %s: %w", path, err) } + if inspectImagesFlag { + hashAlgorithms := make([]manifest.HashAlgorithm, len(hash)) + for i, h := range hash { + hashAlgorithms[i] = manifest.HashAlgorithm(h) + } + inspector := &helpers.ImageInspector{ + Algorithms: hashAlgorithms, + Filesystem: fetcher.ToFS(context.TODO(), pub.Fetcher), + } + + // Inspect publication files and overwrite the links + pub.Manifest.ReadingOrder = pub.Manifest.ReadingOrder.Copy(inspector) + if inspector.Error() != nil { + return fmt.Errorf("failed inspecting images in reading order: %w", inspector.Error()) + } + pub.Manifest.Resources = pub.Manifest.Resources.Copy(inspector) + if inspector.Error() != nil { + return fmt.Errorf("failed inspecting images in resources: %w", inspector.Error()) + } + } + var jsonBytes []byte if indentFlag == "" { jsonBytes, err = json.Marshal(pub.Manifest) @@ -84,40 +122,8 @@ func init() { manifestCmd.Flags().StringVarP(&indentFlag, "indent", "i", "", "Indentation used to pretty-print") manifestCmd.Flags().Var(&inferA11yFlag, "infer-a11y", "Infer accessibility metadata: no, merged, split") manifestCmd.Flags().BoolVar(&inferPageCountFlag, "infer-page-count", false, "Infer the number of pages from the generated position list.") -} - -type InferA11yMetadata streamer.InferA11yMetadata - -// String is used both by fmt.Print and by Cobra in help text -func (e *InferA11yMetadata) String() string { - if e == nil { - return "no" - } - switch *e { - case InferA11yMetadata(streamer.InferA11yMetadataMerged): - return "merged" - case InferA11yMetadata(streamer.InferA11yMetadataSplit): - return "split" - default: - return "no" - } -} - -func (e *InferA11yMetadata) Set(v string) error { - switch v { - case "no": - *e = InferA11yMetadata(streamer.InferA11yMetadataNo) - case "merged": - *e = InferA11yMetadata(streamer.InferA11yMetadataMerged) - case "split": - *e = InferA11yMetadata(streamer.InferA11yMetadataSplit) - default: - return errors.New(`must be one of "no", "merged", or "split"`) - } - return nil -} - -// Type is only used in help text. -func (e *InferA11yMetadata) Type() string { - return "string" + manifestCmd.Flags().StringSliceVar(&hash, "hash", []string{string(manifest.HashAlgorithmSHA256), string(manifest.HashAlgorithmMD5)}, "Hashes to use when enhancing links, such as with image inspection. Note visual hashes are more computationally expensive. Acceptable values: sha256,md5,phash-dct,https://blurha.sh") + manifestCmd.Flags().BoolVar(&inspectImagesFlag, "inspect-images", false, "Inspect images in the manifest. Their links will be enhanced with size, width and height, and hashes") + // manifestCmd.Flags().StringSliceVar(&inferIgnoreImageHashesFlag, "infer-a11y-ignore-image-hashes", nil, "Ignore the given hashes when inferring textual accessibility. Hashes are in the format :, separated by commas.") + // manifestCmd.Flags().StringVar(&inferIgnoreImageDirectoryFlag, "infer-a11y-ignore-image-dir", "", "Ignore the images in a given directory when inferring textual accessibility.") } diff --git a/cmd/rwp/cmd/serve.go b/cmd/rwp/cmd/serve.go index 5e4bd192..d10be98e 100644 --- a/cmd/rwp/cmd/serve.go +++ b/cmd/rwp/cmd/serve.go @@ -1,8 +1,10 @@ package cmd import ( + "context" "errors" "fmt" + "log" "net/http" "os" "path/filepath" @@ -10,9 +12,16 @@ import ( "log/slog" + "cloud.google.com/go/storage" + "github.com/aws/aws-sdk-go-v2/aws" + "github.com/aws/aws-sdk-go-v2/config" + "github.com/aws/aws-sdk-go-v2/credentials" + "github.com/aws/aws-sdk-go-v2/service/s3" "github.com/readium/go-toolkit/cmd/rwp/cmd/serve" + "github.com/readium/go-toolkit/cmd/rwp/cmd/serve/client" "github.com/readium/go-toolkit/pkg/streamer" "github.com/spf13/cobra" + "google.golang.org/api/option" ) var debugFlag bool @@ -21,6 +30,19 @@ var bindAddressFlag string var bindPortFlag uint16 +// Cloud-related flags +var s3EndpointFlag string +var s3RegionFlag string +var s3AccessKeyFlag string +var s3SecretKeyFlag string + +var httpAuthorizationFlag string + +var remoteArchiveTimeoutFlag uint32 +var remoteArchiveCacheSize uint32 +var remoteArchiveCacheCount uint32 +var remoteArchiveCacheAll uint32 + var serveCmd = &cobra.Command{ Use: "serve ", Short: "Start a local HTTP server, serving a specified directory of publications", @@ -74,12 +96,64 @@ to the internet except for testing/debugging purposes.`, slog.SetLogLoggerLevel(slog.LevelInfo) } + // Set up remote publication retrieval clients + remote := serve.Remote{} + + // S3 + options := []func(*config.LoadOptions) error{ + config.WithRegion(s3RegionFlag), + config.WithRequestChecksumCalculation(0), + config.WithResponseChecksumValidation(0), + // TODO: look into custom HTTP client, user-agent + } + if s3AccessKeyFlag != "" && s3SecretKeyFlag != "" { + options = append(options, config.WithCredentialsProvider(credentials.NewStaticCredentialsProvider(s3AccessKeyFlag, s3SecretKeyFlag, ""))) + } + cfg, err := config.LoadDefaultConfig(context.Background(), options...) + if err != nil { + log.Fatal(err) + } + _, err = cfg.Credentials.Retrieve(context.Background()) + if err == nil { + remote.S3 = s3.NewFromConfig(cfg, func(o *s3.Options) { + if s3EndpointFlag != "" { + o.BaseEndpoint = aws.String(s3EndpointFlag) + } + }) + } else { + slog.Warn("S3 credentials retrieval failed, S3 support will be disabled", "error", err) + } + + // GCS + opts := []option.ClientOption{ + option.WithScopes(storage.ScopeReadOnly), + storage.WithJSONReads(), + // option.WithUserAgent(TODO), + // TODO: look into more efficient transport (HTTP client) + } + remote.GCS, err = storage.NewClient(context.Background(), opts...) + if err != nil { + slog.Warn("GCS client creation failed, GCS support will be disabled", "error", err) + } + + remote.HTTP, err = client.NewHTTPClient(httpAuthorizationFlag) + if err != nil { + slog.Warn("HTTP client creation failed, HTTP support will be disabled", "error", err) + } + + // Remote archive streaming tweaks + remote.Config.CacheCountThreshold = int64(remoteArchiveCacheCount) + remote.Config.CacheSizeThreshold = int64(remoteArchiveCacheSize) + remote.Config.Timeout = time.Duration(remoteArchiveTimeoutFlag) * time.Second + remote.Config.CacheAllThreshold = int64(remoteArchiveCacheAll) + + // Create server pubServer := serve.NewServer(serve.ServerConfig{ Debug: debugFlag, BaseDirectory: path, JSONIndent: indentFlag, InferA11yMetadata: streamer.InferA11yMetadata(inferA11yFlag), - }) + }, remote) bind := fmt.Sprintf("%s:%d", bindAddressFlag, bindPortFlag) httpServer := &http.Server{ @@ -109,4 +183,15 @@ func init() { serveCmd.Flags().Var(&inferA11yFlag, "infer-a11y", "Infer accessibility metadata: no, merged, split") serveCmd.Flags().BoolVarP(&debugFlag, "debug", "d", false, "Enable debug mode") + serveCmd.Flags().StringVar(&s3EndpointFlag, "s3-endpoint", "", "Custom S3 endpoint URL") + serveCmd.Flags().StringVar(&s3RegionFlag, "s3-region", "auto", "S3 region") + serveCmd.Flags().StringVar(&s3AccessKeyFlag, "s3-access-key", "", "S3 access key") + serveCmd.Flags().StringVar(&s3SecretKeyFlag, "s3-secret-key", "", "S3 secret key") + + serveCmd.Flags().StringVar(&httpAuthorizationFlag, "http-authorization", "", "HTTP authorization header value (e.g. 'Bearer ' or 'Basic ')") + + serveCmd.Flags().Uint32Var(&remoteArchiveTimeoutFlag, "remote-archive-timeout", 60, "Timeout for remote archive requests (in seconds)") + serveCmd.Flags().Uint32Var(&remoteArchiveCacheSize, "remote-archive-cache-size", 1024*1024, "Max size of items in an archive that can be cached (in bytes)") + serveCmd.Flags().Uint32Var(&remoteArchiveCacheCount, "remote-archive-cache-count", 64, "Max number of items in an archive that can be cached") + serveCmd.Flags().Uint32Var(&remoteArchiveCacheAll, "remote-archive-cache-all", 1024*1024, "Archives this size or less (in bytes) will be cached in full") } diff --git a/cmd/rwp/cmd/serve/api.go b/cmd/rwp/cmd/serve/api.go index dfcae9de..cd5cacab 100644 --- a/cmd/rwp/cmd/serve/api.go +++ b/cmd/rwp/cmd/serve/api.go @@ -2,6 +2,7 @@ package serve import ( "bytes" + "context" "encoding/base64" "encoding/json" "log/slog" @@ -52,29 +53,80 @@ func (s *Server) demoList(w http.ResponseWriter, req *http.Request) { enc.Encode(files) } -func (s *Server) getPublication(filename string) (*pub.Publication, error) { +func (s *Server) getPublication(ctx context.Context, filename string) (*pub.Publication, bool, error) { fpath, err := base64.RawURLEncoding.DecodeString(filename) if err != nil { - return nil, err + return nil, false, err } + loc, err := url.URLFromString(string(fpath)) + if err != nil { + return nil, false, errors.Wrap(err, "failed creating URL from filepath") + } + u := url.BaseFile.Resolve(loc).(url.AbsoluteURL) // Turn relative filepaths into file:/// URLs - cp := filepath.Clean(string(fpath)) - dat, ok := s.lfu.Get(cp) + dat, ok := s.lfu.Get(u.String()) if !ok { - pub, err := streamer.New(streamer.Config{ + var pub *pub.Publication + var remote bool + config := streamer.Config{ InferA11yMetadata: s.config.InferA11yMetadata, - }).Open(asset.File(filepath.Join(s.config.BaseDirectory, cp)), "") - if err != nil { - return nil, errors.Wrap(err, "failed opening "+cp) + HttpClient: s.remote.HTTP, + } + if u.IsFile() { + path, err := url.FromFilepath(filepath.Join(s.config.BaseDirectory, path.Clean(u.Path()))) + if err != nil { + return nil, remote, errors.Wrap(err, "failed creating URL from filepath") + } + + pub, err = streamer.New(config).Open(ctx, asset.File(path), "") + if err != nil { + return nil, remote, errors.Wrap(err, "failed opening "+path.String()) + } + } else { + switch u.Scheme() { + case url.SchemeS3: + remote = true + if s.remote.S3 == nil { + return nil, remote, errors.New("S3 client not configured") + } + config.ArchiveFactory = archive.NewS3ArchiveFactory(s.remote.S3, archive.NewDefaultRemoteArchiveConfig()) + pub, err = streamer.New(config).Open(ctx, asset.S3(s.remote.S3, u), "") + if err != nil { + return nil, remote, errors.Wrap(err, "failed opening "+u.String()) + } + case url.SchemeGS: + remote = true + if s.remote.GCS == nil { + return nil, remote, errors.New("GCS client not configured") + } + config.ArchiveFactory = archive.NewGCSArchiveFactory(s.remote.GCS, archive.NewDefaultRemoteArchiveConfig()) + pub, err = streamer.New(config).Open(ctx, asset.GCS(s.remote.GCS, u), "") + if err != nil { + return nil, remote, errors.Wrap(err, "failed opening "+u.String()) + } + case url.SchemeHTTP, url.SchemeHTTPS: + remote = true + if s.remote.HTTP == nil { + return nil, remote, errors.New("HTTP client not configured") + } + config.ArchiveFactory = archive.NewHTTPArchiveFactory(s.remote.HTTP, archive.NewDefaultRemoteArchiveConfig()) + pub, err = streamer.New(config).Open(ctx, asset.HTTP(s.remote.HTTP, u), "") + if err != nil { + return nil, remote, errors.Wrap(err, "failed opening "+u.String()) + } + default: + return nil, remote, errors.New("unsupported scheme " + u.Scheme().String()) + } } // Cache the publication - encPub := &cache.CachedPublication{Publication: pub} - s.lfu.Set(cp, encPub) + encPub := cache.EncapsulatePublication(pub, remote) + s.lfu.Set(u.String(), encPub) - return encPub.Publication, nil + return encPub.Publication, remote, nil } - return dat.(*cache.CachedPublication).Publication, nil + cp := dat.(*cache.CachedPublication) + return cp.Publication, cp.Remote, nil } func (s *Server) getManifest(w http.ResponseWriter, req *http.Request) { @@ -82,10 +134,13 @@ func (s *Server) getManifest(w http.ResponseWriter, req *http.Request) { filename := vars["path"] // Load the publication - publication, err := s.getPublication(filename) + publication, _, err := s.getPublication(req.Context(), filename) if err != nil { slog.Error("failed opening publication", "error", err) w.WriteHeader(500) + if s.config.Debug { + w.Write([]byte(err.Error())) + } return } @@ -103,6 +158,9 @@ func (s *Server) getManifest(w http.ResponseWriter, req *http.Request) { if err != nil { slog.Error("failed creating self URL", "error", err) w.WriteHeader(500) + if s.config.Debug { + w.Write([]byte(err.Error())) + } return } @@ -117,6 +175,9 @@ func (s *Server) getManifest(w http.ResponseWriter, req *http.Request) { if err != nil { slog.Error("failed marshalling manifest JSON", "error", err) w.WriteHeader(500) + if s.config.Debug { + w.Write([]byte(err.Error())) + } return } @@ -127,6 +188,9 @@ func (s *Server) getManifest(w http.ResponseWriter, req *http.Request) { if err != nil { slog.Error("failed writing manifest JSON to buffer", "error", err) w.WriteHeader(500) + if s.config.Debug { + w.Write([]byte(err.Error())) + } return } } else { @@ -134,6 +198,9 @@ func (s *Server) getManifest(w http.ResponseWriter, req *http.Request) { if err != nil { slog.Error("failed indenting manifest JSON", "error", err) w.WriteHeader(500) + if s.config.Debug { + w.Write([]byte(err.Error())) + } return } } @@ -158,6 +225,9 @@ func (s *Server) getManifest(w http.ResponseWriter, req *http.Request) { if err != nil { slog.Error("failed writing manifest JSON to response writer", "error", err) w.WriteHeader(500) + if s.config.Debug { + w.Write([]byte(err.Error())) + } return } } @@ -167,10 +237,13 @@ func (s *Server) getAsset(w http.ResponseWriter, r *http.Request) { filename := vars["path"] // Load the publication - publication, err := s.getPublication(filename) + publication, remote, err := s.getPublication(r.Context(), filename) if err != nil { slog.Error("failed opening publication", "error", err) w.WriteHeader(500) + if s.config.Debug { + w.Write([]byte(err.Error())) + } return } @@ -179,6 +252,9 @@ func (s *Server) getAsset(w http.ResponseWriter, r *http.Request) { if err != nil { slog.Error("failed parsing asset path as URL", "error", err) w.WriteHeader(400) + if s.config.Debug { + w.Write([]byte(err.Error())) + } return } rawHref := href.Raw() @@ -199,11 +275,11 @@ func (s *Server) getAsset(w http.ResponseWriter, r *http.Request) { } // Get the asset from the publication - res := publication.Get(finalLink) + res := publication.Get(r.Context(), finalLink) defer res.Close() // Get asset length in bytes - l, rerr := res.Length() + l, rerr := res.Length(r.Context()) if rerr != nil { w.WriteHeader(rerr.HTTPStatus()) w.Write([]byte(rerr.Error())) @@ -250,27 +326,69 @@ func (s *Server) getAsset(w http.ResponseWriter, r *http.Request) { } cres, ok := res.(fetcher.CompressedResource) + normalResponse := func() { + if remote { + var bin []byte + bin, rerr = res.Read(r.Context(), start, end) + if rerr == nil { + _, err = w.Write(bin) + if err != nil { + rerr = fetcher.Other(err) + } + } + } else { + _, rerr = res.Stream(r.Context(), w, start, end) + } + } if ok && cres.CompressedAs(archive.CompressionMethodDeflate) && start == 0 && end == 0 { // Stream the asset in compressed format if supported by the user agent if supportsEncoding(r, "deflate") { - w.Header().Set("content-encoding", "deflate") - w.Header().Set("content-length", strconv.FormatInt(cres.CompressedLength(), 10)) - _, err = cres.StreamCompressed(w) + headers := func() { + w.Header().Set("content-encoding", "deflate") + w.Header().Set("content-length", strconv.FormatInt(cres.CompressedLength(r.Context()), 10)) + } + if remote { + var bin []byte + bin, rerr = cres.ReadCompressed(r.Context()) + if rerr == nil { + headers() + _, err = w.Write(bin) + if err != nil { + rerr = fetcher.Other(err) + } + } + } else { + headers() + _, rerr = cres.StreamCompressed(r.Context(), w) + } } else if supportsEncoding(r, "gzip") && l <= archive.GzipMaxLength { - w.Header().Set("content-encoding", "gzip") - w.Header().Set("content-length", strconv.FormatInt(cres.CompressedLength()+archive.GzipWrapperLength, 10)) - _, err = cres.StreamCompressedGzip(w) + headers := func() { + w.Header().Set("content-encoding", "gzip") + w.Header().Set("content-length", strconv.FormatInt(cres.CompressedLength(r.Context())+archive.GzipWrapperLength, 10)) + } + if remote { + var bin []byte + bin, rerr = cres.ReadCompressedGzip(r.Context()) + if rerr == nil { + headers() + _, err = w.Write(bin) + if err != nil { + rerr = fetcher.Other(err) + } + } + } else { + headers() + _, rerr = cres.StreamCompressedGzip(r.Context(), w) + } } else { - // Fall back to normal streaming - _, rerr = res.Stream(w, start, end) + normalResponse() } } else { - // Stream the asset - _, rerr = res.Stream(w, start, end) + normalResponse() } if rerr != nil { - if errors.Is(err, syscall.EPIPE) || errors.Is(err, syscall.ECONNRESET) { + if errors.Is(rerr.Cause, syscall.EPIPE) || errors.Is(rerr.Cause, syscall.ECONNRESET) { // Ignore client errors return } diff --git a/cmd/rwp/cmd/serve/cache/pubcache.go b/cmd/rwp/cmd/serve/cache/pubcache.go index 52a63f5a..2c038739 100644 --- a/cmd/rwp/cmd/serve/cache/pubcache.go +++ b/cmd/rwp/cmd/serve/cache/pubcache.go @@ -7,11 +7,11 @@ import ( // CachedPublication implements Evictable type CachedPublication struct { *pub.Publication + Remote bool } -func EncapsulatePublication(pub *pub.Publication) *CachedPublication { - cp := &CachedPublication{pub} - return cp +func EncapsulatePublication(pub *pub.Publication, remote bool) *CachedPublication { + return &CachedPublication{pub, remote} } func (cp *CachedPublication) OnEvict() { diff --git a/cmd/rwp/cmd/serve/client/http_auth.go b/cmd/rwp/cmd/serve/client/http_auth.go new file mode 100644 index 00000000..9ea8a422 --- /dev/null +++ b/cmd/rwp/cmd/serve/client/http_auth.go @@ -0,0 +1,33 @@ +package client + +import ( + "net/http" +) + +type authTransport struct { + Authorization string + Transport http.RoundTripper +} + +func (a *authTransport) RoundTrip(req *http.Request) (*http.Response, error) { + if a.Authorization == "" { + return a.transport().RoundTrip(req) + } + req2 := req.Clone(req.Context()) + req2.Header.Set("Authorization", a.Authorization) + return a.transport().RoundTrip(req2) +} + +func (a *authTransport) transport() http.RoundTripper { + if a.Transport != nil { + return a.Transport + } + return http.DefaultTransport +} + +func newAuthenticatedRoundTripper(auth string, transport *http.Transport) http.RoundTripper { + return &authTransport{ + Authorization: auth, + Transport: transport, + } +} diff --git a/cmd/rwp/cmd/serve/client/http_client.go b/cmd/rwp/cmd/serve/client/http_client.go new file mode 100644 index 00000000..7778a5b0 --- /dev/null +++ b/cmd/rwp/cmd/serve/client/http_client.go @@ -0,0 +1,67 @@ +package client + +import ( + "fmt" + "net" + "net/http" + "runtime" + "syscall" + "time" +) + +// Code below mostly from https://www.agwa.name/blog/post/preventing_server_side_request_forgery_in_golang + +func safeSocketControl(network string, address string, conn syscall.RawConn) error { + if !(network == "tcp4" || network == "tcp6") { + return fmt.Errorf("%s is not a safe network type", network) + } + + host, port, err := net.SplitHostPort(address) + if err != nil { + return fmt.Errorf("%s is not a valid host/port pair: %s", address, err) + } + + ipaddress := net.ParseIP(host) + if ipaddress == nil { + return fmt.Errorf("%s is not a valid IP address", host) + } + + if !isPublicIPAddress(ipaddress) { + return fmt.Errorf("%s is not a public IP address", ipaddress) + } + + if !(port == "80" || port == "443") { + return fmt.Errorf("%s is not a safe port number", port) + } + + return nil +} + +// Some of the below conf values from https://github.com/imgproxy/imgproxy/blob/master/transport/transport.go + +const ClientKeepAliveTimeout = 90 // Imgproxy default +var Workers = runtime.GOMAXPROCS(0) * 2 // Imgproxy default + +func NewHTTPClient(auth string) (*http.Client, error) { + safeDialer := &net.Dialer{ + Timeout: 30 * time.Second, + KeepAlive: 30 * time.Second, + DualStack: true, + Control: safeSocketControl, + } + + safeTransport := &http.Transport{ + Proxy: http.ProxyFromEnvironment, + DialContext: safeDialer.DialContext, + ForceAttemptHTTP2: true, + MaxIdleConns: 100, + MaxIdleConnsPerHost: Workers + 1, + IdleConnTimeout: time.Duration(ClientKeepAliveTimeout) * time.Second, + TLSHandshakeTimeout: 10 * time.Second, + ExpectContinueTimeout: 1 * time.Second, + } + + return &http.Client{ + Transport: newAuthenticatedRoundTripper(auth, safeTransport), + }, nil +} diff --git a/cmd/rwp/cmd/serve/client/ipaddress.go b/cmd/rwp/cmd/serve/client/ipaddress.go new file mode 100644 index 00000000..6dd40fb0 --- /dev/null +++ b/cmd/rwp/cmd/serve/client/ipaddress.go @@ -0,0 +1,62 @@ +/* + * Written in 2019 by Andrew Ayer + * + * To the extent possible under law, the author(s) have dedicated all + * copyright and related and neighboring rights to this software to the + * public domain worldwide. This software is distributed without any + * warranty. + * + * You should have received a copy of the CC0 Public + * Domain Dedication along with this software. If not, see + * . + */ +package client + +import ( + "net" +) + +func ipv4Net(a, b, c, d byte, subnetPrefixLen int) net.IPNet { + return net.IPNet{net.IPv4(a, b, c, d), net.CIDRMask(96+subnetPrefixLen, 128)} +} + +var reservedIPv4Nets = []net.IPNet{ + ipv4Net(0, 0, 0, 0, 8), // Current network + ipv4Net(10, 0, 0, 0, 8), // Private + ipv4Net(100, 64, 0, 0, 10), // RFC6598 + ipv4Net(127, 0, 0, 0, 8), // Loopback + ipv4Net(169, 254, 0, 0, 16), // Link-local + ipv4Net(172, 16, 0, 0, 12), // Private + ipv4Net(192, 0, 0, 0, 24), // RFC6890 + ipv4Net(192, 0, 2, 0, 24), // Test, doc, examples + ipv4Net(192, 88, 99, 0, 24), // IPv6 to IPv4 relay + ipv4Net(192, 168, 0, 0, 16), // Private + ipv4Net(198, 18, 0, 0, 15), // Benchmarking tests + ipv4Net(198, 51, 100, 0, 24), // Test, doc, examples + ipv4Net(203, 0, 113, 0, 24), // Test, doc, examples + ipv4Net(224, 0, 0, 0, 4), // Multicast + ipv4Net(240, 0, 0, 0, 4), // Reserved (includes broadcast / 255.255.255.255) +} + +var globalUnicastIPv6Net = net.IPNet{net.IP{0x20, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, net.CIDRMask(3, 128)} + +func isIPv6GlobalUnicast(address net.IP) bool { + return globalUnicastIPv6Net.Contains(address) +} + +func isIPv4Reserved(address net.IP) bool { + for _, reservedNet := range reservedIPv4Nets { + if reservedNet.Contains(address) { + return true + } + } + return false +} + +func isPublicIPAddress(address net.IP) bool { + if address.To4() != nil { + return !isIPv4Reserved(address) + } else { + return isIPv6GlobalUnicast(address) + } +} diff --git a/cmd/rwp/cmd/serve/server.go b/cmd/rwp/cmd/serve/server.go index 20f1b5df..00527e0b 100644 --- a/cmd/rwp/cmd/serve/server.go +++ b/cmd/rwp/cmd/serve/server.go @@ -1,13 +1,24 @@ package serve import ( + "net/http" "time" + "cloud.google.com/go/storage" + "github.com/aws/aws-sdk-go-v2/service/s3" "github.com/gorilla/mux" "github.com/readium/go-toolkit/cmd/rwp/cmd/serve/cache" + "github.com/readium/go-toolkit/pkg/archive" "github.com/readium/go-toolkit/pkg/streamer" ) +type Remote struct { + S3 *s3.Client // AWS S3-compatible storage + GCS *storage.Client // Google Cloud Storage + HTTP *http.Client // HTTP-requested storage + Config archive.RemoteArchiveConfig +} + type ServerConfig struct { Debug bool BaseDirectory string @@ -17,6 +28,7 @@ type ServerConfig struct { type Server struct { config ServerConfig + remote Remote router *mux.Router lfu *cache.TinyLFU } @@ -24,9 +36,10 @@ type Server struct { const MaxCachedPublicationAmount = 10 const MaxCachedPublicationTTL = time.Second * time.Duration(600) -func NewServer(config ServerConfig) *Server { +func NewServer(config ServerConfig, remote Remote) *Server { return &Server{ config: config, + remote: remote, lfu: cache.NewTinyLFU(MaxCachedPublicationAmount, MaxCachedPublicationTTL), } } diff --git a/go.mod b/go.mod index 3906b8cd..502ae3eb 100644 --- a/go.mod +++ b/go.mod @@ -1,47 +1,109 @@ module github.com/readium/go-toolkit -go 1.22.0 - -toolchain go1.23.5 +go 1.24.0 require ( + cloud.google.com/go/storage v1.51.0 github.com/CAFxX/httpcompression v0.0.9 github.com/agext/regexp v1.3.0 github.com/andybalholm/cascadia v1.3.3 + github.com/aws/aws-sdk-go-v2 v1.36.3 + github.com/aws/aws-sdk-go-v2/config v1.29.14 + github.com/aws/aws-sdk-go-v2/credentials v1.17.67 + github.com/aws/aws-sdk-go-v2/service/s3 v1.79.2 + github.com/aws/smithy-go v1.22.3 + github.com/azr/phash v0.2.0 + github.com/bbrks/go-blurhash v1.1.1 github.com/deckarep/golang-set v1.8.0 + github.com/disintegration/imaging v1.6.2 github.com/go-viper/mapstructure/v2 v2.2.1 github.com/gorilla/mux v1.8.1 github.com/gotd/contrib v0.21.0 + github.com/kettek/apng v0.0.0-20220823221153-ff692776a607 github.com/pdfcpu/pdfcpu v0.9.1 github.com/pkg/errors v0.9.1 github.com/readium/xmlquery v0.0.0-20230106230237-8f493145aef4 github.com/relvacode/iso8601 v1.6.0 - github.com/spf13/cobra v1.8.1 + github.com/spf13/cobra v1.9.1 github.com/stretchr/testify v1.10.0 github.com/trimmer-io/go-xmp v1.0.0 github.com/vmihailenco/go-tinylfu v0.2.2 github.com/zeebo/xxh3 v1.0.2 - golang.org/x/exp v0.0.0-20250128182459-e0ece0dbea4c - golang.org/x/net v0.34.0 - golang.org/x/text v0.21.0 + go4.org v0.0.0-20230225012048-214862532bf5 + golang.org/x/exp v0.0.0-20250408133849-7e4ce0ab07d0 + golang.org/x/image v0.26.0 + golang.org/x/net v0.39.0 + golang.org/x/text v0.24.0 + google.golang.org/api v0.229.0 ) require ( + cel.dev/expr v0.23.1 // indirect + cloud.google.com/go v0.120.1 // indirect + cloud.google.com/go/auth v0.16.0 // indirect + cloud.google.com/go/auth/oauth2adapt v0.2.8 // indirect + cloud.google.com/go/compute/metadata v0.6.0 // indirect + cloud.google.com/go/iam v1.5.2 // indirect + cloud.google.com/go/monitoring v1.24.2 // indirect + github.com/GoogleCloudPlatform/opentelemetry-operations-go/detectors/gcp v1.27.0 // indirect + github.com/GoogleCloudPlatform/opentelemetry-operations-go/exporter/metric v0.51.0 // indirect + github.com/GoogleCloudPlatform/opentelemetry-operations-go/internal/resourcemapping v0.51.0 // indirect github.com/andybalholm/brotli v1.1.1 // indirect - github.com/antchfx/xpath v1.3.3 // indirect + github.com/antchfx/xpath v1.3.4 // indirect + github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.6.10 // indirect + github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.16.30 // indirect + github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.34 // indirect + github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.34 // indirect + github.com/aws/aws-sdk-go-v2/internal/ini v1.8.3 // indirect + github.com/aws/aws-sdk-go-v2/internal/v4a v1.3.34 // indirect + github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.12.3 // indirect + github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.7.0 // indirect + github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.12.15 // indirect + github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.18.15 // indirect + github.com/aws/aws-sdk-go-v2/service/sso v1.25.3 // indirect + github.com/aws/aws-sdk-go-v2/service/ssooidc v1.30.1 // indirect + github.com/aws/aws-sdk-go-v2/service/sts v1.33.19 // indirect + github.com/azr/gift v1.1.2 // indirect github.com/cespare/xxhash/v2 v2.3.0 // indirect + github.com/cncf/xds/go v0.0.0-20250326154945-ae57f3c0d45f // indirect github.com/davecgh/go-spew v1.1.1 // indirect + github.com/envoyproxy/go-control-plane/envoy v1.32.4 // indirect + github.com/envoyproxy/protoc-gen-validate v1.2.1 // indirect + github.com/felixge/httpsnoop v1.0.4 // indirect + github.com/go-logr/logr v1.4.2 // indirect + github.com/go-logr/stdr v1.2.2 // indirect github.com/golang/groupcache v0.0.0-20241129210726-2c02b8208cf8 // indirect + github.com/google/s2a-go v0.1.9 // indirect + github.com/google/uuid v1.6.0 // indirect + github.com/googleapis/enterprise-certificate-proxy v0.3.6 // indirect + github.com/googleapis/gax-go/v2 v2.14.1 // indirect github.com/hhrutter/lzw v1.0.0 // indirect - github.com/hhrutter/tiff v1.0.1 // indirect + github.com/hhrutter/tiff v1.0.2 // indirect github.com/inconshreveable/mousetrap v1.1.0 // indirect - github.com/klauspost/compress v1.17.11 // indirect - github.com/klauspost/cpuid/v2 v2.2.9 // indirect + github.com/klauspost/compress v1.18.0 // indirect + github.com/klauspost/cpuid/v2 v2.2.10 // indirect + github.com/planetscale/vtprotobuf v0.6.1-0.20240319094008-0393e58bdf10 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect github.com/spf13/pflag v1.0.6 // indirect - golang.org/x/image v0.23.0 // indirect - golang.org/x/sys v0.29.0 // indirect - gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127 // indirect + go.opentelemetry.io/auto/sdk v1.1.0 // indirect + go.opentelemetry.io/contrib/detectors/gcp v1.35.0 // indirect + go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.60.0 // indirect + go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.60.0 // indirect + go.opentelemetry.io/otel v1.35.0 // indirect + go.opentelemetry.io/otel/metric v1.35.0 // indirect + go.opentelemetry.io/otel/sdk v1.35.0 // indirect + go.opentelemetry.io/otel/sdk/metric v1.35.0 // indirect + go.opentelemetry.io/otel/trace v1.35.0 // indirect + golang.org/x/crypto v0.37.0 // indirect + golang.org/x/oauth2 v0.29.0 // indirect + golang.org/x/sync v0.13.0 // indirect + golang.org/x/sys v0.32.0 // indirect + golang.org/x/time v0.11.0 // indirect + google.golang.org/genproto v0.0.0-20250414145226-207652e42e2e // indirect + google.golang.org/genproto/googleapis/api v0.0.0-20250414145226-207652e42e2e // indirect + google.golang.org/genproto/googleapis/rpc v0.0.0-20250414145226-207652e42e2e // indirect + google.golang.org/grpc v1.71.1 // indirect + google.golang.org/protobuf v1.36.6 // indirect gopkg.in/yaml.v2 v2.4.0 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect ) diff --git a/go.sum b/go.sum index 35822bd4..77d7f30f 100644 --- a/go.sum +++ b/go.sum @@ -1,5 +1,54 @@ +cel.dev/expr v0.23.1 h1:K4KOtPCJQjVggkARsjG9RWXP6O4R73aHeJMa/dmCQQg= +cel.dev/expr v0.23.1/go.mod h1:hLPLo1W4QUmuYdA72RBX06QTs6MXw941piREPl3Yfiw= +cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= +cloud.google.com/go v0.34.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= +cloud.google.com/go v0.38.0/go.mod h1:990N+gfupTy94rShfmMCWGDn0LpTmnzTp2qbd1dvSRU= +cloud.google.com/go v0.44.1/go.mod h1:iSa0KzasP4Uvy3f1mN/7PiObzGgflwredwwASm/v6AU= +cloud.google.com/go v0.44.2/go.mod h1:60680Gw3Yr4ikxnPRS/oxxkBccT6SA1yMk63TGekxKY= +cloud.google.com/go v0.45.1/go.mod h1:RpBamKRgapWJb87xiFSdk4g1CME7QZg3uwTez+TSTjc= +cloud.google.com/go v0.46.3/go.mod h1:a6bKKbmY7er1mI7TEI4lsAkts/mkhTSZK8w33B4RAg0= +cloud.google.com/go v0.50.0/go.mod h1:r9sluTvynVuxRIOHXQEHMFffphuXHOMZMycpNR5e6To= +cloud.google.com/go v0.53.0/go.mod h1:fp/UouUEsRkN6ryDKNW/Upv/JBKnv6WDthjR6+vze6M= +cloud.google.com/go v0.120.1 h1:Z+5V7yd383+9617XDCyszmK5E4wJRJL+tquMfDj9hLM= +cloud.google.com/go v0.120.1/go.mod h1:56Vs7sf/i2jYM6ZL9NYlC82r04PThNcPS5YgFmb0rp8= +cloud.google.com/go/auth v0.16.0 h1:Pd8P1s9WkcrBE2n/PhAwKsdrR35V3Sg2II9B+ndM3CU= +cloud.google.com/go/auth v0.16.0/go.mod h1:1howDHJ5IETh/LwYs3ZxvlkXF48aSqqJUM+5o02dNOI= +cloud.google.com/go/auth/oauth2adapt v0.2.8 h1:keo8NaayQZ6wimpNSmW5OPc283g65QNIiLpZnkHRbnc= +cloud.google.com/go/auth/oauth2adapt v0.2.8/go.mod h1:XQ9y31RkqZCcwJWNSx2Xvric3RrU88hAYYbjDWYDL+c= +cloud.google.com/go/bigquery v1.0.1/go.mod h1:i/xbL2UlR5RvWAURpBYZTtm/cXjCha9lbfbpx4poX+o= +cloud.google.com/go/bigquery v1.3.0/go.mod h1:PjpwJnslEMmckchkHFfq+HTD2DmtT67aNFKH1/VBDHE= +cloud.google.com/go/compute/metadata v0.6.0 h1:A6hENjEsCDtC1k8byVsgwvVcioamEHvZ4j01OwKxG9I= +cloud.google.com/go/compute/metadata v0.6.0/go.mod h1:FjyFAW1MW0C203CEOMDTu3Dk1FlqW3Rga40jzHL4hfg= +cloud.google.com/go/datastore v1.0.0/go.mod h1:LXYbyblFSglQ5pkeyhO+Qmw7ukd3C+pD7TKLgZqpHYE= +cloud.google.com/go/iam v1.5.2 h1:qgFRAGEmd8z6dJ/qyEchAuL9jpswyODjA2lS+w234g8= +cloud.google.com/go/iam v1.5.2/go.mod h1:SE1vg0N81zQqLzQEwxL2WI6yhetBdbNQuTvIKCSkUHE= +cloud.google.com/go/logging v1.13.0 h1:7j0HgAp0B94o1YRDqiqm26w4q1rDMH7XNRU34lJXHYc= +cloud.google.com/go/logging v1.13.0/go.mod h1:36CoKh6KA/M0PbhPKMq6/qety2DCAErbhXT62TuXALA= +cloud.google.com/go/longrunning v0.6.6 h1:XJNDo5MUfMM05xK3ewpbSdmt7R2Zw+aQEMbdQR65Rbw= +cloud.google.com/go/longrunning v0.6.6/go.mod h1:hyeGJUrPHcx0u2Uu1UFSoYZLn4lkMrccJig0t4FI7yw= +cloud.google.com/go/monitoring v1.24.2 h1:5OTsoJ1dXYIiMiuL+sYscLc9BumrL3CarVLL7dd7lHM= +cloud.google.com/go/monitoring v1.24.2/go.mod h1:x7yzPWcgDRnPEv3sI+jJGBkwl5qINf+6qY4eq0I9B4U= +cloud.google.com/go/pubsub v1.0.1/go.mod h1:R0Gpsv3s54REJCy4fxDixWD93lHJMoZTyQ2kNxGRt3I= +cloud.google.com/go/pubsub v1.1.0/go.mod h1:EwwdRX2sKPjnvnqCa270oGRyludottCI76h+R3AArQw= +cloud.google.com/go/storage v1.0.0/go.mod h1:IhtSnM/ZTZV8YYJWCY8RULGVqBDmpoyjwiyrjsg+URw= +cloud.google.com/go/storage v1.5.0/go.mod h1:tpKbwo567HUNpVclU5sGELwQWBDZ8gh0ZeosJ0Rtdos= +cloud.google.com/go/storage v1.51.0 h1:ZVZ11zCiD7b3k+cH5lQs/qcNaoSz3U9I0jgwVzqDlCw= +cloud.google.com/go/storage v1.51.0/go.mod h1:YEJfu/Ki3i5oHC/7jyTgsGZwdQ8P9hqMqvpi5kRKGgc= +cloud.google.com/go/trace v1.11.5 h1:CALS1loyxJMnRiCwZSpdf8ac7iCsjreMxFD2WGxzzHU= +cloud.google.com/go/trace v1.11.5/go.mod h1:TwblCcqNInriu5/qzaeYEIH7wzUcchSdeY2l5wL3Eec= +dmitri.shuralyov.com/gpu/mtl v0.0.0-20190408044501-666a987793e9/go.mod h1:H6x//7gZCb22OMCxBHrMx7a5I7Hp++hsVxbQ4BYO7hU= +github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= +github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo= github.com/CAFxX/httpcompression v0.0.9 h1:0ue2X8dOLEpxTm8tt+OdHcgA+gbDge0OqFQWGKSqgrg= github.com/CAFxX/httpcompression v0.0.9/go.mod h1:XX8oPZA+4IDcfZ0A71Hz0mZsv/YJOgYygkFhizVPilM= +github.com/GoogleCloudPlatform/opentelemetry-operations-go/detectors/gcp v1.27.0 h1:ErKg/3iS1AKcTkf3yixlZ54f9U1rljCkQyEXWUnIUxc= +github.com/GoogleCloudPlatform/opentelemetry-operations-go/detectors/gcp v1.27.0/go.mod h1:yAZHSGnqScoU556rBOVkwLze6WP5N+U11RHuWaGVxwY= +github.com/GoogleCloudPlatform/opentelemetry-operations-go/exporter/metric v0.51.0 h1:fYE9p3esPxA/C0rQ0AHhP0drtPXDRhaWiwg1DPqO7IU= +github.com/GoogleCloudPlatform/opentelemetry-operations-go/exporter/metric v0.51.0/go.mod h1:BnBReJLvVYx2CS/UHOgVz2BXKXD9wsQPxZug20nZhd0= +github.com/GoogleCloudPlatform/opentelemetry-operations-go/internal/cloudmock v0.51.0 h1:OqVGm6Ei3x5+yZmSJG1Mh2NwHvpVmZ08CB5qJhT9Nuk= +github.com/GoogleCloudPlatform/opentelemetry-operations-go/internal/cloudmock v0.51.0/go.mod h1:SZiPHWGOOk3bl8tkevxkoiwPgsIl6CwrWcbwjfHZpdM= +github.com/GoogleCloudPlatform/opentelemetry-operations-go/internal/resourcemapping v0.51.0 h1:6/0iUd0xrnX7qt+mLNRwg5c0PGv8wpE8K90ryANQwMI= +github.com/GoogleCloudPlatform/opentelemetry-operations-go/internal/resourcemapping v0.51.0/go.mod h1:otE2jQekW/PqXk1Awf5lmfokJx4uwuqcj1ab5SpGeW0= github.com/agext/regexp v1.3.0 h1:6+9tp+S41TU48gFNV47bX+pp1q7WahGofw6JccmsCDs= github.com/agext/regexp v1.3.0/go.mod h1:6phv1gViOJXWcTfpxOi9VMS+MaSAo+SUDf7do3ur1HA= github.com/andybalholm/brotli v1.0.5/go.mod h1:fO7iG3H7G2nSZ7m0zPUDn85XEX2GTukHGRSepvi9Eig= @@ -8,68 +57,196 @@ github.com/andybalholm/brotli v1.1.1/go.mod h1:05ib4cKhjx3OQYUY22hTVd34Bc8upXjOL github.com/andybalholm/cascadia v1.3.3 h1:AG2YHrzJIm4BZ19iwJ/DAua6Btl3IwJX+VI4kktS1LM= github.com/andybalholm/cascadia v1.3.3/go.mod h1:xNd9bqTn98Ln4DwST8/nG+H0yuB8Hmgu1YHNnWw0GeA= github.com/antchfx/xpath v1.2.1/go.mod h1:i54GszH55fYfBmoZXapTHN8T8tkcHfRgLyVwwqzXNcs= -github.com/antchfx/xpath v1.3.3 h1:tmuPQa1Uye0Ym1Zn65vxPgfltWb/Lxu2jeqIGteJSRs= -github.com/antchfx/xpath v1.3.3/go.mod h1:i54GszH55fYfBmoZXapTHN8T8tkcHfRgLyVwwqzXNcs= +github.com/antchfx/xpath v1.3.4 h1:1ixrW1VnXd4HurCj7qnqnR0jo14g8JMe20Fshg1Vgz4= +github.com/antchfx/xpath v1.3.4/go.mod h1:i54GszH55fYfBmoZXapTHN8T8tkcHfRgLyVwwqzXNcs= +github.com/aws/aws-sdk-go-v2 v1.36.3 h1:mJoei2CxPutQVxaATCzDUjcZEjVRdpsiiXi2o38yqWM= +github.com/aws/aws-sdk-go-v2 v1.36.3/go.mod h1:LLXuLpgzEbD766Z5ECcRmi8AzSwfZItDtmABVkRLGzg= +github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.6.10 h1:zAybnyUQXIZ5mok5Jqwlf58/TFE7uvd3IAsa1aF9cXs= +github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.6.10/go.mod h1:qqvMj6gHLR/EXWZw4ZbqlPbQUyenf4h82UQUlKc+l14= +github.com/aws/aws-sdk-go-v2/config v1.29.14 h1:f+eEi/2cKCg9pqKBoAIwRGzVb70MRKqWX4dg1BDcSJM= +github.com/aws/aws-sdk-go-v2/config v1.29.14/go.mod h1:wVPHWcIFv3WO89w0rE10gzf17ZYy+UVS1Geq8Iei34g= +github.com/aws/aws-sdk-go-v2/credentials v1.17.67 h1:9KxtdcIA/5xPNQyZRgUSpYOE6j9Bc4+D7nZua0KGYOM= +github.com/aws/aws-sdk-go-v2/credentials v1.17.67/go.mod h1:p3C44m+cfnbv763s52gCqrjaqyPikj9Sg47kUVaNZQQ= +github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.16.30 h1:x793wxmUWVDhshP8WW2mlnXuFrO4cOd3HLBroh1paFw= +github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.16.30/go.mod h1:Jpne2tDnYiFascUEs2AWHJL9Yp7A5ZVy3TNyxaAjD6M= +github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.34 h1:ZK5jHhnrioRkUNOc+hOgQKlUL5JeC3S6JgLxtQ+Rm0Q= +github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.34/go.mod h1:p4VfIceZokChbA9FzMbRGz5OV+lekcVtHlPKEO0gSZY= +github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.34 h1:SZwFm17ZUNNg5Np0ioo/gq8Mn6u9w19Mri8DnJ15Jf0= +github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.34/go.mod h1:dFZsC0BLo346mvKQLWmoJxT+Sjp+qcVR1tRVHQGOH9Q= +github.com/aws/aws-sdk-go-v2/internal/ini v1.8.3 h1:bIqFDwgGXXN1Kpp99pDOdKMTTb5d2KyU5X/BZxjOkRo= +github.com/aws/aws-sdk-go-v2/internal/ini v1.8.3/go.mod h1:H5O/EsxDWyU+LP/V8i5sm8cxoZgc2fdNR9bxlOFrQTo= +github.com/aws/aws-sdk-go-v2/internal/v4a v1.3.34 h1:ZNTqv4nIdE/DiBfUUfXcLZ/Spcuz+RjeziUtNJackkM= +github.com/aws/aws-sdk-go-v2/internal/v4a v1.3.34/go.mod h1:zf7Vcd1ViW7cPqYWEHLHJkS50X0JS2IKz9Cgaj6ugrs= +github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.12.3 h1:eAh2A4b5IzM/lum78bZ590jy36+d/aFLgKF/4Vd1xPE= +github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.12.3/go.mod h1:0yKJC/kb8sAnmlYa6Zs3QVYqaC8ug2AbnNChv5Ox3uA= +github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.7.0 h1:lguz0bmOoGzozP9XfRJR1QIayEYo+2vP/No3OfLF0pU= +github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.7.0/go.mod h1:iu6FSzgt+M2/x3Dk8zhycdIcHjEFb36IS8HVUVFoMg0= +github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.12.15 h1:dM9/92u2F1JbDaGooxTq18wmmFzbJRfXfVfy96/1CXM= +github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.12.15/go.mod h1:SwFBy2vjtA0vZbjjaFtfN045boopadnoVPhu4Fv66vY= +github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.18.15 h1:moLQUoVq91LiqT1nbvzDukyqAlCv89ZmwaHw/ZFlFZg= +github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.18.15/go.mod h1:ZH34PJUc8ApjBIfgQCFvkWcUDBtl/WTD+uiYHjd8igA= +github.com/aws/aws-sdk-go-v2/service/s3 v1.79.2 h1:tWUG+4wZqdMl/znThEk9tcCy8tTMxq8dW0JTgamohrY= +github.com/aws/aws-sdk-go-v2/service/s3 v1.79.2/go.mod h1:U5SNqwhXB3Xe6F47kXvWihPl/ilGaEDe8HD/50Z9wxc= +github.com/aws/aws-sdk-go-v2/service/sso v1.25.3 h1:1Gw+9ajCV1jogloEv1RRnvfRFia2cL6c9cuKV2Ps+G8= +github.com/aws/aws-sdk-go-v2/service/sso v1.25.3/go.mod h1:qs4a9T5EMLl/Cajiw2TcbNt2UNo/Hqlyp+GiuG4CFDI= +github.com/aws/aws-sdk-go-v2/service/ssooidc v1.30.1 h1:hXmVKytPfTy5axZ+fYbR5d0cFmC3JvwLm5kM83luako= +github.com/aws/aws-sdk-go-v2/service/ssooidc v1.30.1/go.mod h1:MlYRNmYu/fGPoxBQVvBYr9nyr948aY/WLUvwBMBJubs= +github.com/aws/aws-sdk-go-v2/service/sts v1.33.19 h1:1XuUZ8mYJw9B6lzAkXhqHlJd/XvaX32evhproijJEZY= +github.com/aws/aws-sdk-go-v2/service/sts v1.33.19/go.mod h1:cQnB8CUnxbMU82JvlqjKR2HBOm3fe9pWorWBza6MBJ4= +github.com/aws/smithy-go v1.22.3 h1:Z//5NuZCSW6R4PhQ93hShNbyBbn8BWCmCVCt+Q8Io5k= +github.com/aws/smithy-go v1.22.3/go.mod h1:t1ufH5HMublsJYulve2RKmHDC15xu1f26kHCp/HgceI= +github.com/azr/gift v1.1.2 h1:EbQ8/1QMtDfz5Beqg+RY5F21KbwGhE8aWSEbF1pp95A= +github.com/azr/gift v1.1.2/go.mod h1:bDKvjyxgachY3zdk831G99y+VANype25eu37uhm3khI= +github.com/azr/phash v0.2.0 h1:F6qkeYlwuMUMkUAJkQFElGrQzFnneJwV+L23VrEQ0cU= +github.com/azr/phash v0.2.0/go.mod h1:vUennaUN3i09UA33YxHpCR5l2CeENoCRB2Jo6pvWNf4= +github.com/bbrks/go-blurhash v1.1.1 h1:uoXOxRPDca9zHYabUTwvS4KnY++KKUbwFo+Yxb8ME4M= +github.com/bbrks/go-blurhash v1.1.1/go.mod h1:lkAsdyXp+EhARcUo85yS2G1o+Sh43I2ebF5togC4bAY= +github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= github.com/cespare/xxhash/v2 v2.1.1/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= -github.com/cpuguy83/go-md2man/v2 v2.0.4/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= +github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI= +github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI= +github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU= +github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= +github.com/cncf/xds/go v0.0.0-20250326154945-ae57f3c0d45f h1:C5bqEmzEPLsHm9Mv73lSE9e9bKV23aB1vxOsmZrkl3k= +github.com/cncf/xds/go v0.0.0-20250326154945-ae57f3c0d45f/go.mod h1:W+zGtBO5Y1IgJhy4+A9GOqVhqLpfZi+vwmdNXUehLA8= +github.com/cpuguy83/go-md2man/v2 v2.0.6/go.mod h1:oOW0eioCTA6cOiMLiUPZOpcVxMig6NIQQ7OS05n1F4g= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/deckarep/golang-set v1.8.0 h1:sk9/l/KqpunDwP7pSjUg0keiOOLEnOBHzykLrsPppp4= github.com/deckarep/golang-set v1.8.0/go.mod h1:5nI87KwE7wgsBU1F4GKAw2Qod7p5kyS383rP6+o6qqo= +github.com/disintegration/imaging v1.6.2 h1:w1LecBlG2Lnp8B3jk5zSuNqd7b4DXhcjwek1ei82L+c= +github.com/disintegration/imaging v1.6.2/go.mod h1:44/5580QXChDfwIclfc/PCwrr44amcmDAg8hxG0Ewe4= +github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= +github.com/envoyproxy/go-control-plane v0.13.4 h1:zEqyPVyku6IvWCFwux4x9RxkLOMUL+1vC9xUFv5l2/M= +github.com/envoyproxy/go-control-plane v0.13.4/go.mod h1:kDfuBlDVsSj2MjrLEtRWtHlsWIFcGyB2RMO44Dc5GZA= +github.com/envoyproxy/go-control-plane/envoy v1.32.4 h1:jb83lalDRZSpPWW2Z7Mck/8kXZ5CQAFYVjQcdVIr83A= +github.com/envoyproxy/go-control-plane/envoy v1.32.4/go.mod h1:Gzjc5k8JcJswLjAx1Zm+wSYE20UrLtt7JZMWiWQXQEw= +github.com/envoyproxy/go-control-plane/ratelimit v0.1.0 h1:/G9QYbddjL25KvtKTv3an9lx6VBE2cnb8wp1vEGNYGI= +github.com/envoyproxy/go-control-plane/ratelimit v0.1.0/go.mod h1:Wk+tMFAFbCXaJPzVVHnPgRKdUdwW/KdbRt94AzgRee4= +github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c= +github.com/envoyproxy/protoc-gen-validate v1.2.1 h1:DEo3O99U8j4hBFwbJfrz9VtgcDfUKS7KJ7spH3d86P8= +github.com/envoyproxy/protoc-gen-validate v1.2.1/go.mod h1:d/C80l/jxXLdfEIhX1W2TmLfsJ31lvEjwamM4DxlWXU= +github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg= +github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U= +github.com/go-gl/glfw v0.0.0-20190409004039-e6da0acd62b1/go.mod h1:vR7hzQXu2zJy9AVAgeJqvqgH9Q5CA+iKCZ2gyEVpxRU= +github.com/go-gl/glfw/v3.3/glfw v0.0.0-20191125211704-12ad95a8df72/go.mod h1:tQ2UAYgL5IevRw8kRxooKSPJfGvJ9fJQFa0TUsXzTg8= +github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= +github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY= +github.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= +github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= +github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= github.com/go-viper/mapstructure/v2 v2.2.1 h1:ZAaOCxANMuZx5RCeg0mBdEZk7DZasvvZIxtHqx8aGss= github.com/go-viper/mapstructure/v2 v2.2.1/go.mod h1:oJDH3BJKyqBA2TXFhDsKDGDTlndYOZ6rGS0BRZIxGhM= +github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q= +github.com/golang/groupcache v0.0.0-20190702054246-869f871628b6/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= +github.com/golang/groupcache v0.0.0-20191227052852-215e87163ea7/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= github.com/golang/groupcache v0.0.0-20241129210726-2c02b8208cf8 h1:f+oWsMOmNPc8JmEHVZIycC7hBoQxHH9pNKQORJNozsQ= github.com/golang/groupcache v0.0.0-20241129210726-2c02b8208cf8/go.mod h1:wcDNUvekVysuuOpQKo3191zZyTpiI6se1N1ULghS0sw= +github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A= +github.com/golang/mock v1.2.0/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A= +github.com/golang/mock v1.3.1/go.mod h1:sBzyDLLjw3U8JLTeZvSv8jJB+tU5PVekmnlKIyFUx0Y= +github.com/golang/mock v1.4.0/go.mod h1:UOMv5ysSaYNkG+OFQykRIcU/QvvxJf3p21QfJ2Bt3cw= +github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= +github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= +github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= +github.com/golang/protobuf v1.3.3/go.mod h1:vzj43D7+SQXF/4pzW/hwtAqwc6iTitCiVSaWz5lYuqw= +github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek= +github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps= github.com/google/brotli/go/cbrotli v0.0.0-20230829110029-ed738e842d2f h1:jopqB+UTSdJGEJT8tEqYyE29zN91fi2827oLET8tl7k= github.com/google/brotli/go/cbrotli v0.0.0-20230829110029-ed738e842d2f/go.mod h1:nOPhAkwVliJdNTkj3gXpljmWhjc4wCaVqbMJcPKWP4s= +github.com/google/btree v0.0.0-20180813153112-4030bb1f1f0c/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ= +github.com/google/btree v1.0.0/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ= +github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M= +github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= +github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= +github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= +github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= +github.com/google/martian v2.1.0+incompatible h1:/CP5g8u/VJHijgedC/Legn3BAbAaWPgecwXBIDzw5no= +github.com/google/martian v2.1.0+incompatible/go.mod h1:9I4somxYTbIHy5NJKHRl3wXiIaQGbYVAs8BPL6v8lEs= +github.com/google/martian/v3 v3.3.3 h1:DIhPTQrbPkgs2yJYdXU/eNACCG5DVQjySNRNlflZ9Fc= +github.com/google/martian/v3 v3.3.3/go.mod h1:iEPrYcgCF7jA9OtScMFQyAlZZ4YXTKEtJ1E6RWzmBA0= +github.com/google/pprof v0.0.0-20181206194817-3ea8567a2e57/go.mod h1:zfwlbNMJ+OItoe0UupaVj+oy1omPYYDuagoSzA8v9mc= +github.com/google/pprof v0.0.0-20190515194954-54271f7e092f/go.mod h1:zfwlbNMJ+OItoe0UupaVj+oy1omPYYDuagoSzA8v9mc= +github.com/google/pprof v0.0.0-20200212024743-f11f1df84d12/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM= +github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm40UhjYkI= +github.com/google/s2a-go v0.1.9 h1:LGD7gtMgezd8a/Xak7mEWL0PjoTQFvpRudN895yqKW0= +github.com/google/s2a-go v0.1.9/go.mod h1:YA0Ei2ZQL3acow2O62kdp9UlnvMmU7kA6Eutn0dXayM= +github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= +github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/googleapis/enterprise-certificate-proxy v0.3.6 h1:GW/XbdyBFQ8Qe+YAmFU9uHLo7OnF5tL52HFAgMmyrf4= +github.com/googleapis/enterprise-certificate-proxy v0.3.6/go.mod h1:MkHOF77EYAE7qfSuSS9PU6g4Nt4e11cnsDUowfwewLA= +github.com/googleapis/gax-go/v2 v2.0.4/go.mod h1:0Wqv26UfaUD9n4G6kQubkQ+KchISgw+vpHVxEJEs9eg= +github.com/googleapis/gax-go/v2 v2.0.5/go.mod h1:DWXyrwAJ9X0FpwwEdw+IPEYBICEFu5mhpdKc/us6bOk= +github.com/googleapis/gax-go/v2 v2.14.1 h1:hb0FFeiPaQskmvakKu5EbCbpntQn48jyHuvrkurSS/Q= +github.com/googleapis/gax-go/v2 v2.14.1/go.mod h1:Hb/NubMaVM88SrNkvl8X/o8XWwDJEPqouaLeN2IUxoA= github.com/gorilla/mux v1.8.1 h1:TuBL49tXwgrFYWhqrNgrUNEY92u81SPhu7sTdzQEiWY= github.com/gorilla/mux v1.8.1/go.mod h1:AKf9I4AEqPTmMytcMc0KkNouC66V3BtZ4qD5fmWSiMQ= github.com/gotd/contrib v0.21.0 h1:4Fj05jnyBE84toXZl7mVTvt7f732n5uglvztyG6nTr4= github.com/gotd/contrib v0.21.0/go.mod h1:ENoUh75IhHGxfz/puVJg8BU4ZF89yrL6Q47TyoNqFYo= +github.com/hashicorp/golang-lru v0.5.0/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8= +github.com/hashicorp/golang-lru v0.5.1/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8= github.com/hhrutter/lzw v1.0.0 h1:laL89Llp86W3rRs83LvKbwYRx6INE8gDn0XNb1oXtm0= github.com/hhrutter/lzw v1.0.0/go.mod h1:2HC6DJSn/n6iAZfgM3Pg+cP1KxeWc3ezG8bBqW5+WEo= -github.com/hhrutter/tiff v1.0.1 h1:MIus8caHU5U6823gx7C6jrfoEvfSTGtEFRiM8/LOzC0= -github.com/hhrutter/tiff v1.0.1/go.mod h1:zU/dNgDm0cMIa8y8YwcYBeuEEveI4B0owqHyiPpJPHc= +github.com/hhrutter/tiff v1.0.2 h1:7H3FQQpKu/i5WaSChoD1nnJbGx4MxU5TlNqqpxw55z8= +github.com/hhrutter/tiff v1.0.2/go.mod h1:pcOeuK5loFUE7Y/WnzGw20YxUdnqjY1P0Jlcieb/cCw= +github.com/ianlancetaylor/demangle v0.0.0-20181102032728-5e5cf60278f6/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc= github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8= github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= +github.com/jstemmer/go-junit-report v0.0.0-20190106144839-af01ea7f8024/go.mod h1:6v2b51hI/fHJwM22ozAgKL4VKDeJcHhJFhtBdhmNjmU= +github.com/jstemmer/go-junit-report v0.9.1/go.mod h1:Brl9GWCQeLvo8nXZwPNNblvFj/XSXhF0NWZEnDohbsk= +github.com/kettek/apng v0.0.0-20220823221153-ff692776a607 h1:8tP9cdXzcGX2AvweVVG/lxbI7BSjWbNNUustwJ9dQVA= +github.com/kettek/apng v0.0.0-20220823221153-ff692776a607/go.mod h1:x78/VRQYKuCftMWS0uK5e+F5RJ7S4gSlESRWI0Prl6Q= +github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= github.com/klauspost/compress v1.16.7/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQskQzEyD//IeE= -github.com/klauspost/compress v1.17.11 h1:In6xLpyWOi1+C7tXUUWv2ot1QvBjxevKAaI6IXrJmUc= -github.com/klauspost/compress v1.17.11/go.mod h1:pMDklpSncoRMuLFrf1W9Ss9KT+0rH90U12bZKk7uwG0= -github.com/klauspost/cpuid/v2 v2.2.9 h1:66ze0taIn2H33fBvCkXuv9BmCwDfafmiIVpKV9kKGuY= -github.com/klauspost/cpuid/v2 v2.2.9/go.mod h1:rqkxqrZ1EhYM9G+hXH7YdowN5R5RGN6NK4QwQ3WMXF8= +github.com/klauspost/compress v1.18.0 h1:c/Cqfb0r+Yi+JtIEq73FWXVkRonBlf0CRNYc8Zttxdo= +github.com/klauspost/compress v1.18.0/go.mod h1:2Pp+KzxcywXVXMr50+X0Q/Lsb43OQHYWRCY2AiWywWQ= +github.com/klauspost/cpuid/v2 v2.2.10 h1:tBs3QSyvjDyFTq3uoc/9xFpCuOsJQFNPiAhYdw2skhE= +github.com/klauspost/cpuid/v2 v2.2.10/go.mod h1:hqwkgyIinND0mEev00jJYCxPNVRVXFQeu1XKlok6oO0= github.com/klauspost/pgzip v1.2.6/go.mod h1:Ch1tH69qFZu15pkjo5kYi6mth2Zzwzt50oCQKQE9RUs= +github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= +github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= +github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= +github.com/matryer/is v1.2.0 h1:92UTHpy8CDwaJ08GqLDzhhuixiBUUD1p3AU6PHddz4A= +github.com/matryer/is v1.2.0/go.mod h1:2fLPjFQM9rhQ15aVEtbuwhJinnOqrmgXPNdZsdwlWXA= github.com/pdfcpu/pdfcpu v0.9.1 h1:q8/KlBdHjkE7ZJU4ofhKG5Rjf7M6L324CVM6BMDySao= github.com/pdfcpu/pdfcpu v0.9.1/go.mod h1:fVfOloBzs2+W2VJCCbq60XIxc3yJHAZ0Gahv1oO0gyI= github.com/pierrec/lz4/v4 v4.1.18 h1:xaKrnTkyoqfh1YItXl56+6KJNVYWlEEPuAQW9xsplYQ= github.com/pierrec/lz4/v4 v4.1.18/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4= github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= +github.com/planetscale/vtprotobuf v0.6.1-0.20240319094008-0393e58bdf10 h1:GFCKgmp0tecUJ0sJuv4pzYCqS9+RGSn52M3FUwPs+uo= +github.com/planetscale/vtprotobuf v0.6.1-0.20240319094008-0393e58bdf10/go.mod h1:t/avpk3KcrXxUnYOhZhMXJlSEyie6gQbtLq5NM3loB8= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= github.com/readium/xmlquery v0.0.0-20230106230237-8f493145aef4 h1:iEQhT4jOppg7EK/r4/1e4ULIeCsugv35O+sDlvce5Bo= github.com/readium/xmlquery v0.0.0-20230106230237-8f493145aef4/go.mod h1:S7gZ8KUgPbsdlF9/iomcwnU31iHMyFEO66+JFJE8uz8= github.com/relvacode/iso8601 v1.6.0 h1:eFXUhMJN3Gz8Rcq82f9DTMW0svjtAVuIEULglM7QHTU= github.com/relvacode/iso8601 v1.6.0/go.mod h1:FlNp+jz+TXpyRqgmM7tnzHHzBnz776kmAH2h3sZCn0I= -github.com/rogpeppe/go-internal v1.12.0 h1:exVL4IDcn6na9z1rAb56Vxr+CgyK3nn3O+epU5NdKM8= -github.com/rogpeppe/go-internal v1.12.0/go.mod h1:E+RYuTGaKKdloAfM02xzb0FW3Paa99yedzYV+kq4uf4= +github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4= +github.com/rogpeppe/go-internal v1.13.1 h1:KvO1DLK/DRN07sQ1LQKScxyZJuNnedQ5/wKSR38lUII= +github.com/rogpeppe/go-internal v1.13.1/go.mod h1:uMEvuHeurkdAXX61udpOXGD/AzZDWNMNyH2VO9fmH0o= github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= -github.com/spf13/cobra v1.8.1 h1:e5/vxKd/rZsfSJMUX1agtjeTDf+qv1/JdBF8gg5k9ZM= -github.com/spf13/cobra v1.8.1/go.mod h1:wHxEcudfqmLYa8iTfL+OuZPbBZkmvliBWKIezN3kD9Y= -github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= +github.com/rwcarlsen/goexif v0.0.0-20190401172101-9e8deecbddbd h1:CmH9+J6ZSsIjUK3dcGsnCnO41eRBOnY12zwkn5qVwgc= +github.com/rwcarlsen/goexif v0.0.0-20190401172101-9e8deecbddbd/go.mod h1:hPqNNc0+uJM6H+SuU8sEs5K5IQeKccPqeSjfgcKGgPk= +github.com/spf13/cobra v1.9.1 h1:CXSaggrXdbHK9CF+8ywj8Amf7PBRmPCOJugH954Nnlo= +github.com/spf13/cobra v1.9.1/go.mod h1:nDyEzZ8ogv936Cinf6g1RU9MRY64Ir93oCnqb9wxYW0= github.com/spf13/pflag v1.0.6 h1:jFzHGLGAlb3ruxLB8MhbI6A8+AQX/2eW4qeyNZXNp2o= github.com/spf13/pflag v1.0.6/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= +github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= @@ -90,41 +267,136 @@ github.com/zeebo/assert v1.3.0 h1:g7C04CbJuIDKNPFHmsk4hwZDO5O+kntRxzaUoNXj+IQ= github.com/zeebo/assert v1.3.0/go.mod h1:Pq9JiuJQpG8JLJdtkwrJESF0Foym2/D9XMU5ciN/wJ0= github.com/zeebo/xxh3 v1.0.2 h1:xZmwmqxHZA8AI603jOQ0tMqmBr9lPeFwGg6d+xy9DC0= github.com/zeebo/xxh3 v1.0.2/go.mod h1:5NWz9Sef7zIDm2JHfFlcQvNekmcEl9ekUZQQKCYaDcA= +go.opencensus.io v0.21.0/go.mod h1:mSImk1erAIZhrmZN+AvHh14ztQfjbGwt4TtuofqLduU= +go.opencensus.io v0.22.0/go.mod h1:+kGneAE2xo2IficOXnaByMWTGM9T73dGwxeWcUqIpI8= +go.opencensus.io v0.22.2/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw= +go.opencensus.io v0.22.3/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw= +go.opentelemetry.io/auto/sdk v1.1.0 h1:cH53jehLUN6UFLY71z+NDOiNJqDdPRaXzTel0sJySYA= +go.opentelemetry.io/auto/sdk v1.1.0/go.mod h1:3wSPjt5PWp2RhlCcmmOial7AvC4DQqZb7a7wCow3W8A= +go.opentelemetry.io/contrib/detectors/gcp v1.35.0 h1:bGvFt68+KTiAKFlacHW6AhA56GF2rS0bdD3aJYEnmzA= +go.opentelemetry.io/contrib/detectors/gcp v1.35.0/go.mod h1:qGWP8/+ILwMRIUf9uIVLloR1uo5ZYAslM4O6OqUi1DA= +go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.60.0 h1:x7wzEgXfnzJcHDwStJT+mxOz4etr2EcexjqhBvmoakw= +go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.60.0/go.mod h1:rg+RlpR5dKwaS95IyyZqj5Wd4E13lk/msnTS0Xl9lJM= +go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.60.0 h1:sbiXRNDSWJOTobXh5HyQKjq6wUC5tNybqjIqDpAY4CU= +go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.60.0/go.mod h1:69uWxva0WgAA/4bu2Yy70SLDBwZXuQ6PbBpbsa5iZrQ= +go.opentelemetry.io/otel v1.35.0 h1:xKWKPxrxB6OtMCbmMY021CqC45J+3Onta9MqjhnusiQ= +go.opentelemetry.io/otel v1.35.0/go.mod h1:UEqy8Zp11hpkUrL73gSlELM0DupHoiq72dR+Zqel/+Y= +go.opentelemetry.io/otel/exporters/stdout/stdoutmetric v1.29.0 h1:WDdP9acbMYjbKIyJUhTvtzj601sVJOqgWdUxSdR/Ysc= +go.opentelemetry.io/otel/exporters/stdout/stdoutmetric v1.29.0/go.mod h1:BLbf7zbNIONBLPwvFnwNHGj4zge8uTCM/UPIVW1Mq2I= +go.opentelemetry.io/otel/metric v1.35.0 h1:0znxYu2SNyuMSQT4Y9WDWej0VpcsxkuklLa4/siN90M= +go.opentelemetry.io/otel/metric v1.35.0/go.mod h1:nKVFgxBZ2fReX6IlyW28MgZojkoAkJGaE8CpgeAU3oE= +go.opentelemetry.io/otel/sdk v1.35.0 h1:iPctf8iprVySXSKJffSS79eOjl9pvxV9ZqOWT0QejKY= +go.opentelemetry.io/otel/sdk v1.35.0/go.mod h1:+ga1bZliga3DxJ3CQGg3updiaAJoNECOgJREo9KHGQg= +go.opentelemetry.io/otel/sdk/metric v1.35.0 h1:1RriWBmCKgkeHEhM7a2uMjMUfP7MsOF5JpUCaEqEI9o= +go.opentelemetry.io/otel/sdk/metric v1.35.0/go.mod h1:is6XYCUMpcKi+ZsOvfluY5YstFnhW0BidkR+gL+qN+w= +go.opentelemetry.io/otel/trace v1.35.0 h1:dPpEfJu1sDIqruz7BHFG3c7528f6ddfSWfFDVt/xgMs= +go.opentelemetry.io/otel/trace v1.35.0/go.mod h1:WUk7DtFp1Aw2MkvqGdwiXYDZZNvA/1J8o6xRXLrIkyc= +go4.org v0.0.0-20230225012048-214862532bf5 h1:nifaUDeh+rPaBCMPMQHZmvJf+QdpLFnuQPwx+LxVmtc= +go4.org v0.0.0-20230225012048-214862532bf5/go.mod h1:F57wTi5Lrj6WLyswp5EYV1ncrEbFGHD4hhz6S1ZYeaU= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.0.0-20190510104115-cbcb75029529/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= +golang.org/x/crypto v0.0.0-20190605123033-f99c8df09eb5/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= +golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= golang.org/x/crypto v0.13.0/go.mod h1:y6Z2r+Rw4iayiXXAIxJIDAJ1zMW4yaTpebo8fPOliYc= golang.org/x/crypto v0.19.0/go.mod h1:Iy9bg/ha4yyC70EfRS8jz+B6ybOBKMaSxLj6P6oBDfU= golang.org/x/crypto v0.23.0/go.mod h1:CKFgDieR+mRhux2Lsu27y0fO304Db0wZe70UKqHu0v8= golang.org/x/crypto v0.31.0/go.mod h1:kDsLvtWBEx7MV9tJOj9bnXsPbxwJQ6csT/x4KIN4Ssk= -golang.org/x/exp v0.0.0-20250128182459-e0ece0dbea4c h1:KL/ZBHXgKGVmuZBZ01Lt57yE5ws8ZPSkkihmEyq7FXc= -golang.org/x/exp v0.0.0-20250128182459-e0ece0dbea4c/go.mod h1:tujkw807nyEEAamNbDrEGzRav+ilXA7PCRAd6xsmwiU= -golang.org/x/image v0.23.0 h1:HseQ7c2OpPKTPVzNjG5fwJsOTCiiwS4QdsYi5XU6H68= -golang.org/x/image v0.23.0/go.mod h1:wJJBTdLfCCf3tiHa1fNxpZmUI4mmoZvwMCPP0ddoNKY= +golang.org/x/crypto v0.37.0 h1:kJNSjF/Xp7kU0iB2Z+9viTPMW4EqqsrywMXLJOOsXSE= +golang.org/x/crypto v0.37.0/go.mod h1:vg+k43peMZ0pUMhYmVAWysMK35e6ioLh3wB8ZCAfbVc= +golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= +golang.org/x/exp v0.0.0-20190306152737-a1d7652674e8/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= +golang.org/x/exp v0.0.0-20190510132918-efd6b22b2522/go.mod h1:ZjyILWgesfNpC6sMxTJOJm9Kp84zZh5NQWvqDGG3Qr8= +golang.org/x/exp v0.0.0-20190829153037-c13cbed26979/go.mod h1:86+5VVa7VpoJ4kLfm080zCjGlMRFzhUhsZKEZO7MGek= +golang.org/x/exp v0.0.0-20191030013958-a1ab85dbe136/go.mod h1:JXzH8nQsPlswgeRAPE3MuO9GYsAcnJvJ4vnMwN/5qkY= +golang.org/x/exp v0.0.0-20191129062945-2f5052295587/go.mod h1:2RIsYlXP63K8oxa1u096TMicItID8zy7Y6sNkU49FU4= +golang.org/x/exp v0.0.0-20191227195350-da58074b4299/go.mod h1:2RIsYlXP63K8oxa1u096TMicItID8zy7Y6sNkU49FU4= +golang.org/x/exp v0.0.0-20200207192155-f17229e696bd/go.mod h1:J/WKrq2StrnmMY6+EHIKF9dgMWnmCNThgcyBT1FY9mM= +golang.org/x/exp v0.0.0-20250408133849-7e4ce0ab07d0 h1:R84qjqJb5nVJMxqWYb3np9L5ZsaDtB+a39EqjV0JSUM= +golang.org/x/exp v0.0.0-20250408133849-7e4ce0ab07d0/go.mod h1:S9Xr4PYopiDyqSyp5NjCrhFrqg6A5zA2E/iPHPhqnS8= +golang.org/x/image v0.0.0-20190227222117-0694c2d4d067/go.mod h1:kZ7UVZpmo3dzQBMxlp+ypCbDeSB+sBbTgSJuh5dn5js= +golang.org/x/image v0.0.0-20190802002840-cff245a6509b/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0= +golang.org/x/image v0.0.0-20191009234506-e7c1f5e7dbb8/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0= +golang.org/x/image v0.26.0 h1:4XjIFEZWQmCZi6Wv8BoxsDhRU3RVnLX04dToTDAEPlY= +golang.org/x/image v0.26.0/go.mod h1:lcxbMFAovzpnJxzXS3nyL83K27tmqtKzIJpctK8YO5c= +golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= +golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU= +golang.org/x/lint v0.0.0-20190301231843-5614ed5bae6f/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= +golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= +golang.org/x/lint v0.0.0-20190409202823-959b441ac422/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= +golang.org/x/lint v0.0.0-20190909230951-414d861bb4ac/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= +golang.org/x/lint v0.0.0-20190930215403-16217165b5de/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= +golang.org/x/lint v0.0.0-20191125180803-fdd1cda4f05f/go.mod h1:5qLYkcX4OjUUV8bRuDixDT3tpyyb+LUpUlRWLxfhWrs= +golang.org/x/lint v0.0.0-20200130185559-910be7a94367/go.mod h1:3xt1FjdF8hUf6vQPIChWIBhFzV8gjjsPE/fR3IyQdNY= +golang.org/x/mobile v0.0.0-20190312151609-d3739f865fa6/go.mod h1:z+o9i4GpDbdi3rU15maQ/Ox0txvL9dWGYEHz965HBQE= +golang.org/x/mobile v0.0.0-20190719004257-d2bd2a29d028/go.mod h1:E/iHnbuqvinMTCcRqshq8CkpyQDoeVncDDYHnLhea+o= +golang.org/x/mod v0.0.0-20190513183733-4bf6d317e70e/go.mod h1:mXi4GBBbnImb6dmsKGUJ2LatrhH/nqhxcFungHvyanc= +golang.org/x/mod v0.1.0/go.mod h1:0QHyrYULN0/3qlju5TqG8bIK38QM8yzMo5ekMj3DlcY= +golang.org/x/mod v0.1.1-0.20191105210325-c90efee705ee/go.mod h1:QqPTAvyqsEbceGzBzNggFXnrqF1CaUcvgkdR5Ot7KZg= +golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= golang.org/x/mod v0.12.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= golang.org/x/mod v0.15.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= golang.org/x/mod v0.17.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= +golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20190108225652-1e06a53dbb7e/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190501004415-9ce7a6920f09/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190503192946-f4e77d36d62c/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190603091049-60506f45cf65/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks= golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20190724013045-ca1201d0de80/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20191209160850-c0dbc17a3553/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20200202094626-16171245cfb2/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= golang.org/x/net v0.0.0-20220127200216-cd36cc0744dd/go.mod h1:CfG3xpIq0wQ8r1q4Su4UZFWDARRcnwPjda9FqA0JpMk= golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= +golang.org/x/net v0.7.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg= golang.org/x/net v0.15.0/go.mod h1:idbUs1IY1+zTqbi8yxTbhexhEEk5ur9LInksu6HrEpk= golang.org/x/net v0.21.0/go.mod h1:bIjVDfnllIU7BJ2DNgfnXvpSvtn8VRwhlsaeUTyUS44= golang.org/x/net v0.25.0/go.mod h1:JkAGAh7GEvH74S6FOH42FLoXpXbE/aqXSrIQjXgsiwM= golang.org/x/net v0.33.0/go.mod h1:HXLR5J+9DxmrqMwG9qjGCxZ+zKXxBru04zlTvWlWuN4= -golang.org/x/net v0.34.0 h1:Mb7Mrk043xzHgnRM88suvJFwzVrRfHEHJEl5/71CKw0= -golang.org/x/net v0.34.0/go.mod h1:di0qlW3YNM5oh6GqDGQr92MyTozJPmybPK4Ev/Gm31k= +golang.org/x/net v0.39.0 h1:ZCu7HMWDxpXpaiKdhzIfaltL9Lp31x/3fCP11bc6/fY= +golang.org/x/net v0.39.0/go.mod h1:X7NRbYVEA+ewNkCNyJ513WmMdQ3BineSwVtN2zD/d+E= +golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= +golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= +golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= +golang.org/x/oauth2 v0.0.0-20191202225959-858c2ad4c8b6/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= +golang.org/x/oauth2 v0.0.0-20200107190931-bf48bf16ab8d/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= +golang.org/x/oauth2 v0.29.0 h1:WdYw2tdTK1S8olAzWHdgeqfy+Mtm9XNhv/xJsY65d98= +golang.org/x/oauth2 v0.29.0/go.mod h1:onh5ek6nERTohokkhCD/y2cV4Do3fxFHFuAejCkRWT8= +golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20190227155943-e225da77a7e6/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.3.0/go.mod h1:FU7BRWz2tNW+3quACPkgCx/L+uEAv1htQ0V83Z9Rj+Y= golang.org/x/sync v0.6.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= golang.org/x/sync v0.7.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= golang.org/x/sync v0.10.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= +golang.org/x/sync v0.13.0 h1:AauUjRAJ9OSnvULf/ARrrVywoJDy0YS2AwQ98I37610= +golang.org/x/sync v0.13.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA= +golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190312061237-fead79001313/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20190502145724-3ef323f4f1fd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20190507160741-ecd444e8653b/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20190606165138-5da285871e9c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20190624142023-c5567b49c5d0/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20190726091711-fc99dfbffb4e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20191204072324-ce4227a45e2e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20191228213918-04cbcbbfeed8/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200212091648-12a6c2dcc1e4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20211216021012-1d35b9e2eb4e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= @@ -136,8 +408,8 @@ golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.17.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/sys v0.28.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= -golang.org/x/sys v0.29.0 h1:TPYlXGxvx1MGTn2GiZDhnjPA9wZzZeGKHHmKhHYvgaU= -golang.org/x/sys v0.29.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/sys v0.32.0 h1:s77OFDvIQeibCmezSnk/q6iAfkdiQaJi4VzroCFrN20= +golang.org/x/sys v0.32.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= golang.org/x/telemetry v0.0.0-20240228155512-f48c80bd79b2/go.mod h1:TeRTkGYfJXctD9OcfyVLyj2J3IxLnKwHJR8f4D8a3YE= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= @@ -147,7 +419,10 @@ golang.org/x/term v0.12.0/go.mod h1:owVbMEjm3cBLCHdkQu9b1opXd4ETQWc3BhuQGKgXgvU= golang.org/x/term v0.17.0/go.mod h1:lLRBjIVuehSbZlaOtGMbcMncT+aqLLLmKrsjNrUguwk= golang.org/x/term v0.20.0/go.mod h1:8UkIAJTvZgivsXaD6/pH6U9ecQzZ45awqEOzuCvwpFY= golang.org/x/term v0.27.0/go.mod h1:iMsnZpn0cago0GOrHO2+Y7u7JPn5AylBrcoWkElMTSM= +golang.org/x/text v0.0.0-20170915032832-14c0d48ead0c/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= @@ -155,20 +430,105 @@ golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8= golang.org/x/text v0.13.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE= golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= golang.org/x/text v0.15.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= -golang.org/x/text v0.21.0 h1:zyQAAkrwaneQ066sspRyJaG9VNi/YJ1NfzcGB3hZ/qo= golang.org/x/text v0.21.0/go.mod h1:4IBbMaMmOPCJ8SecivzSH54+73PCFmPWxNTLm+vZkEQ= +golang.org/x/text v0.24.0 h1:dd5Bzh4yt5KYA8f9CJHCP4FB4D51c2c6JvN37xJJkJ0= +golang.org/x/text v0.24.0/go.mod h1:L8rBsPeo2pSS+xqN0d5u2ikmjtmoJbDBT1b7nHvFCdU= +golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= +golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= +golang.org/x/time v0.11.0 h1:/bpjEDfN9tkoN/ryeYHnv5hcMlc8ncjMcM4XBk5NWV0= +golang.org/x/time v0.11.0/go.mod h1:CDIdPxbZBQxdj6cxyCIdrNogrJKMJ7pr37NYpMcMDSg= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY= +golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= +golang.org/x/tools v0.0.0-20190312151545-0bb0c0a6e846/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= +golang.org/x/tools v0.0.0-20190312170243-e65039ee4138/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= +golang.org/x/tools v0.0.0-20190425150028-36563e24a262/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= +golang.org/x/tools v0.0.0-20190506145303-2d16b83fe98c/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= +golang.org/x/tools v0.0.0-20190524140312-2c0ae7006135/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= +golang.org/x/tools v0.0.0-20190606124116-d0a3d012864b/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc= +golang.org/x/tools v0.0.0-20190621195816-6e04913cbbac/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc= +golang.org/x/tools v0.0.0-20190628153133-6cdbf07be9d0/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc= +golang.org/x/tools v0.0.0-20190816200558-6889da9d5479/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.0.0-20190911174233-4f2ddba30aff/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.0.0-20191012152004-8de300cfc20a/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.0.0-20191113191852-77e3bb0ad9e7/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.0.0-20191115202509-3a792d9c32b2/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.0.0-20191125144606-a911d9008d1f/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.0.0-20191216173652-a0e659d51361/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= +golang.org/x/tools v0.0.0-20191227053925-7b8e75db28f4/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= +golang.org/x/tools v0.0.0-20200130002326-2f3ba24bd6e7/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= +golang.org/x/tools v0.0.0-20200207183749-b753a1ba74fa/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= +golang.org/x/tools v0.0.0-20200212150539-ea181f53ac56/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU= golang.org/x/tools v0.13.0/go.mod h1:HvlwmtVNQAhOuCjW7xxvovg8wbNq7LwfXh/k7wXUl58= golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d/go.mod h1:aiJjzUbINMkxbQROHiO6hDPo2LHcIPhhQsa9DLh0yGk= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +google.golang.org/api v0.4.0/go.mod h1:8k5glujaEP+g9n7WNsDg8QP6cUVNI86fCNMcbazEtwE= +google.golang.org/api v0.7.0/go.mod h1:WtwebWUNSVBH/HAw79HIFXZNqEvBhG+Ra+ax0hx3E3M= +google.golang.org/api v0.8.0/go.mod h1:o4eAsZoiT+ibD93RtjEohWalFOjRDx6CVaqeizhEnKg= +google.golang.org/api v0.9.0/go.mod h1:o4eAsZoiT+ibD93RtjEohWalFOjRDx6CVaqeizhEnKg= +google.golang.org/api v0.13.0/go.mod h1:iLdEw5Ide6rF15KTC1Kkl0iskquN2gFfn9o9XIsbkAI= +google.golang.org/api v0.14.0/go.mod h1:iLdEw5Ide6rF15KTC1Kkl0iskquN2gFfn9o9XIsbkAI= +google.golang.org/api v0.15.0/go.mod h1:iLdEw5Ide6rF15KTC1Kkl0iskquN2gFfn9o9XIsbkAI= +google.golang.org/api v0.17.0/go.mod h1:BwFmGc8tA3vsd7r/7kR8DY7iEEGSU04BFxCo5jP/sfE= +google.golang.org/api v0.229.0 h1:p98ymMtqeJ5i3lIBMj5MpR9kzIIgzpHHh8vQ+vgAzx8= +google.golang.org/api v0.229.0/go.mod h1:wyDfmq5g1wYJWn29O22FDWN48P7Xcz0xz+LBpptYvB0= +google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= +google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= +google.golang.org/appengine v1.5.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= +google.golang.org/appengine v1.6.1/go.mod h1:i06prIuMbXzDqacNJfV5OdTW448YApPu5ww/cMBSeb0= +google.golang.org/appengine v1.6.5/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc= +google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc= +google.golang.org/genproto v0.0.0-20190307195333-5fe7a883aa19/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE= +google.golang.org/genproto v0.0.0-20190418145605-e7d98fc518a7/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE= +google.golang.org/genproto v0.0.0-20190425155659-357c62f0e4bb/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE= +google.golang.org/genproto v0.0.0-20190502173448-54afdca5d873/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE= +google.golang.org/genproto v0.0.0-20190801165951-fa694d86fc64/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc= +google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc= +google.golang.org/genproto v0.0.0-20190911173649-1774047e7e51/go.mod h1:IbNlFCBrqXvoKpeg0TB2l7cyZUmoaFKYIwrEpbDKLA8= +google.golang.org/genproto v0.0.0-20191108220845-16a3f7862a1a/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc= +google.golang.org/genproto v0.0.0-20191115194625-c23dd37a84c9/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc= +google.golang.org/genproto v0.0.0-20191216164720-4f79533eabd1/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc= +google.golang.org/genproto v0.0.0-20191230161307-f3c370f40bfb/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc= +google.golang.org/genproto v0.0.0-20200212174721-66ed5ce911ce/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= +google.golang.org/genproto v0.0.0-20250414145226-207652e42e2e h1:mYHFv3iX85YMwhGSaZS4xpkM8WQDmJUovz7yqsFrwDk= +google.golang.org/genproto v0.0.0-20250414145226-207652e42e2e/go.mod h1:TQT1YpH/rlDCS5+EuFaqPIMqDfuNMFR1OI8EcZJGgAk= +google.golang.org/genproto/googleapis/api v0.0.0-20250414145226-207652e42e2e h1:UdXH7Kzbj+Vzastr5nVfccbmFsmYNygVLSPk1pEfDoY= +google.golang.org/genproto/googleapis/api v0.0.0-20250414145226-207652e42e2e/go.mod h1:085qFyf2+XaZlRdCgKNCIZ3afY2p4HHZdoIRpId8F4A= +google.golang.org/genproto/googleapis/rpc v0.0.0-20250414145226-207652e42e2e h1:ztQaXfzEXTmCBvbtWYRhJxW+0iJcz2qXfd38/e9l7bA= +google.golang.org/genproto/googleapis/rpc v0.0.0-20250414145226-207652e42e2e/go.mod h1:qQ0YXyHHx3XkvlzUtpXDkS29lDSafHMZBAZDc03LQ3A= +google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c= +google.golang.org/grpc v1.20.1/go.mod h1:10oTOabMzJvdu6/UiuZezV6QK5dSlG84ov/aaiqXj38= +google.golang.org/grpc v1.21.1/go.mod h1:oYelfM1adQP15Ek0mdvEgi9Df8B9CZIaU1084ijfRaM= +google.golang.org/grpc v1.23.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg= +google.golang.org/grpc v1.26.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk= +google.golang.org/grpc v1.27.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk= +google.golang.org/grpc v1.27.1/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk= +google.golang.org/grpc v1.71.1 h1:ffsFWr7ygTUscGPI0KKK6TLrGz0476KUvvsbqWK0rPI= +google.golang.org/grpc v1.71.1/go.mod h1:H0GRtasmQOh9LkFoCPDu3ZrwUtD1YGE+b2vYBYd/8Ec= +google.golang.org/protobuf v1.36.6 h1:z1NpPI8ku2WgiWnf+t9wTPsn6eP1L7ksHUlkfLvd9xY= +google.golang.org/protobuf v1.36.6/go.mod h1:jduwjTPXsFjZGTmRluh+L6NjiWu7pchiJ2/5YcXBHnY= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= -gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127 h1:qIbj1fsPNlZgppZ+VLlY7N33q108Sa+fhmuc+sWQYwY= gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= +gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= +gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI= +gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= +honnef.co/go/tools v0.0.0-20190106161140-3f1c8253044a/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= +honnef.co/go/tools v0.0.0-20190418001031-e561f6794a2a/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= +honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= +honnef.co/go/tools v0.0.1-2019.2.3/go.mod h1:a3bituU0lyd329TUQxRnasdCoJDkEUEAqEt0JzvZhAg= +rsc.io/binaryregexp v0.2.0/go.mod h1:qTv7/COck+e2FymRvadv62gMdZztPaShugOCi3I+8D8= +rsc.io/quote/v3 v3.1.0/go.mod h1:yEA65RcK8LyAZtP9Kv3t0HmxON59tX3rD+tICJqUlj0= +rsc.io/sampler v1.3.0/go.mod h1:T1hPZKmBbMNahiBKFy5HrXp6adAjACjK9JXDnKaTXpA= diff --git a/pkg/analyzer/image.go b/pkg/analyzer/image.go new file mode 100644 index 00000000..b8f8e690 --- /dev/null +++ b/pkg/analyzer/image.go @@ -0,0 +1,389 @@ +package analyzer + +import ( + "bytes" + "context" + "crypto/md5" + "crypto/sha256" + "encoding/base64" + "encoding/binary" + "image" + "image/gif" + _ "image/png" + "io" + "io/fs" + "time" + + "github.com/azr/phash" + "github.com/bbrks/go-blurhash" + "github.com/disintegration/imaging" + "github.com/kettek/apng" + "github.com/pkg/errors" + "github.com/readium/go-toolkit/pkg/manifest" + "github.com/readium/go-toolkit/pkg/mediatype" + "go4.org/media/heif" + "golang.org/x/exp/slices" + "golang.org/x/image/riff" + "golang.org/x/image/webp" +) + +const blurHashAlgorithm = "https://blurha.sh" + +type imageProperties struct { + Size uint64 + ModTime time.Time + Width uint32 + Height uint32 + Animated bool + Hashes struct { + Sha256 []byte + Md5 []byte + PhashDCT []byte + BlurHash string + } +} + +func (p *imageProperties) EnhanceLink(link *manifest.Link) { + link.Height = uint(p.Height) + link.Width = uint(p.Width) + link.Size = uint(p.Size) + + hashes := make(manifest.HashList, 0, 4) + if link.Properties == nil { + link.Properties = manifest.Properties{} + } else if existingHashes := link.Properties.Hash(); len(existingHashes) > 0 { + hashes = existingHashes + } + + if len(p.Hashes.Sha256) > 0 { + hashes = append(hashes, manifest.HashValue{ + Algorithm: manifest.HashAlgorithmSHA256, + Value: base64.StdEncoding.EncodeToString(p.Hashes.Sha256), + }) + } + if len(p.Hashes.Md5) > 0 { + hashes = append(hashes, manifest.HashValue{ + Algorithm: manifest.HashAlgorithmMD5, + Value: base64.StdEncoding.EncodeToString(p.Hashes.Md5), + }) + } + if len(p.Hashes.PhashDCT) > 0 { + hashes = append(hashes, manifest.HashValue{ + Algorithm: manifest.HashAlgorithmPhashDCT, + Value: base64.StdEncoding.EncodeToString(p.Hashes.PhashDCT), + }) + } + if len(p.Hashes.BlurHash) > 0 { + hashes = append(hashes, manifest.HashValue{ + Algorithm: blurHashAlgorithm, + Value: p.Hashes.BlurHash, + }) + } + hashes.Deduplicate() + + link.Properties["hash"] = hashes + link.Properties["animated"] = p.Animated +} + +func hasVisualAlgorithm(hashes []manifest.HashAlgorithm) bool { + visualHash := false + for _, hash := range hashes { + switch hash { + case manifest.HashAlgorithmPhashDCT, blurHashAlgorithm: + visualHash = true + default: + continue + } + if visualHash { + break + } + } + return visualHash +} + +// Image inspects an image located in the provided filesystem, using the provided link's [manifest.HREF] +// as a path. Additional properties from the link, such as the [mediatype.MediaType], may be used, and should +// be included. A copy of the provided link will be returned, with the `size`, `width`, `height` and +// `properties.animated` attributes set. A slice of [manifest.HashAlgorithm] can be provided, in which case +// the returned link will also have `properties.hash` set with the computed hashes. Currently, the supported +// algorithms are: [manifest.HashAlgorithmSHA256], [manifest.HashAlgorithmMD5], [manifest.HashAlgorithmPhashDCT], +// and `https://blurha.sh` (BlurHash). The latter two are visual hashes, which are more computationally expensive. +func Image(system fs.FS, link manifest.Link, algorithms []manifest.HashAlgorithm) (*manifest.Link, error) { + path := link.Href.String() + file, err := system.Open(path) + if err != nil { + return nil, err + } + defer file.Close() + + reopen := func() error { + if of, ok := file.(io.ReadSeeker); ok { + of.Seek(0, 0) + } else { + file, err = system.Open(path) + if err != nil { + return err + } + } + return nil + } + + stat, err := file.Stat() + if err != nil { + return nil, err + } + if stat.IsDir() { + return nil, errors.New("must be a file, not a directory") + } + + p := &imageProperties{ + Size: uint64(stat.Size()), + ModTime: stat.ModTime(), + } + if p.Size == 0 { + return nil, errors.New("file is empty") + } + + var mt *mediatype.MediaType + if link.MediaType != nil { + mt = link.MediaType + } else { + mt = mediatype.OfFileOnly(context.TODO(), file) + if mt == nil { + return nil, errors.New("file has unknown media type") + } + } + if !mt.IsBitmap() { + return nil, errors.New("file is not a bitmap image") + } + // Reopen because the sniffer may have read the file + err = reopen() + if err != nil { + return nil, errors.Wrap(err, "failed reopening file") + } + + // Gather image width/height, and weed out unsuppored formats + var iconfig image.Config + if mt.Equal(&mediatype.AVIF) { + var hf *heif.File + if of, ok := file.(io.ReaderAt); ok { + hf = heif.Open(of) + } else { + // Fall back to reading the file into memory + stat, err := file.Stat() + if err != nil { + return nil, errors.Wrap(err, "failed statting AVIF file") + } + buf := make([]byte, stat.Size()) + _, err = io.ReadFull(file, buf) + if err != nil { + return nil, errors.Wrap(err, "failed reading AVIF file into memory") + } + hf = heif.Open(bytes.NewReader(buf)) + } + pi, err := hf.PrimaryItem() + if err != nil { + return nil, errors.Wrap(err, "failed decoding supposed AVIF file metadata") + } + w, h, ok := pi.VisualDimensions() + if !ok { + return nil, errors.New("failed reading AVIF image dimensions") + } + iconfig.Width = w + iconfig.Height = h + } else if mt.Equal(&mediatype.JXL) { + magicBytes := make([]byte, 12) + _, err = io.ReadFull(file, magicBytes) + if err != nil { + return nil, errors.Wrap(err, "failed reading JXL file for magic numbers") + } + jxlCodestream := []byte{0xFF, 0x0A} + jxlBmff := []byte{0x00, 0x00, 0x00, 0x0C, 0x4A, 0x58, 0x4C, 0x20, 0x0D, 0x0A, 0x87, 0x0A} + if !bytes.Equal(magicBytes[:2], jxlCodestream) && !bytes.Equal(magicBytes, jxlBmff) { + return nil, errors.New("supposed JXL file is invalid") + } + return nil, errors.New("JXL file format is currently unsupported") + } else { + var format string + iconfig, format, err = image.DecodeConfig(file) + if err != nil { + return nil, errors.Wrap(err, "failed decoding image metadata") + } + + // Special case for animated PNG which gets registered by the apng package + if format == "apng" { + if !mt.Equal(&mediatype.PNG) { + return nil, errors.New("file mediatype not equal to decoded image format") + } + } else { + imt := mediatype.OfExtension(format) + if imt == nil { + return nil, errors.New("failed determining mediatype from image format \"" + format + "\"") + } + if !mt.Equal(imt) { + return nil, errors.New("file mediatype not equal to decoded image format") + } + } + } + p.Width = uint32(iconfig.Width) + p.Height = uint32(iconfig.Height) + if p.Width == 0 || p.Height == 0 { + return nil, errors.New("image has zero width or height") + } + + // Decoder the image so the animation can be checked, and the perceptual hash calculated + err = reopen() + if err != nil { + return nil, errors.Wrap(err, "failed reopening file") + } + visualHash := hasVisualAlgorithm(algorithms) + hashVisually := func(img image.Image) { + if !visualHash { + return + } + // First downsize the image because: + // - Phash/DCT already does this, down to 32x32px + // - Blurhash encoding with a large image is very slow + if img.Bounds().Dx() > 128 { + img = imaging.Resize(img, 128, 0, imaging.Lanczos) + } + + if slices.Contains(algorithms, manifest.HashAlgorithmPhashDCT) { + // Create phash and put it in a byte array + p.Hashes.PhashDCT = make([]byte, 8) + binary.BigEndian.PutUint64(p.Hashes.PhashDCT, phash.DTC(img)) + } + if slices.Contains(algorithms, blurHashAlgorithm) { + // Create the blurhash + blurhash, _ := blurhash.Encode(5, 5, img) + p.Hashes.BlurHash = blurhash + } + } + if mt.Equal(&mediatype.GIF) { + gi, err := gif.DecodeAll(file) + if err != nil { + return nil, errors.Wrap(err, "failed decoding GIF file") + } + if len(gi.Image) > 1 { + p.Animated = true + } + hashVisually(gi.Image[0]) + } else if mt.Equal(&mediatype.PNG) { + pi, err := apng.DecodeAll(file) + if err != nil { + return nil, errors.Wrap(err, "failed decoding (A)PNG file") + } + if len(pi.Frames) > 1 { + p.Animated = true + } + hashVisually(pi.Frames[0].Image) + } else if mt.Equal(&mediatype.AVIF) { + // Not sure how to determine if an AVIF is animated. Very rare + if visualHash { + return nil, errors.New("AVIF perceptual hash is not yet supported") + } + } else if mt.Equal(&mediatype.WEBP) { + var wi image.Image + if _, ok := file.(io.ReadSeeker); ok { + p.Animated, err = isWEBPAnimated(file) + if err != nil { + return nil, errors.Wrap(err, "failed checking if WEBP file is animated") + } + if visualHash { + if p.Animated { + return nil, errors.New("perceptual hash of animated WEBP is not yet supported") + } + err = reopen() + if err != nil { + return nil, errors.Wrap(err, "failed reopening file") + } + wi, err = webp.Decode(file) + } + } else { + // Only read the file once into memory since we need to read it two times in a row + buf := make([]byte, p.Size) + _, err = io.ReadFull(file, buf) + if err != nil { + return nil, errors.Wrap(err, "failed reading WEBP file into memory") + } + r := bytes.NewReader(buf) + p.Animated, err = isWEBPAnimated(r) + if err != nil { + return nil, errors.Wrap(err, "failed checking if WEBP file is animated") + } + if visualHash { + if p.Animated { + return nil, errors.New("perceptual hash of animated WEBP is not yet supported") + } + r.Seek(0, 0) + wi, err = webp.Decode(r) + } + } + if err != nil { + return nil, errors.Wrap(err, "failed decoding WEBP file") + } + if visualHash { + hashVisually(wi) + } + } else if visualHash { + // Any other format can be generically decoded since it doesn't support animation + img, _, err := image.Decode(file) + if err != nil { + return nil, errors.Wrap(err, "failed decoding image file") + } + hashVisually(img) + } + + // Now compute the cryptographic hashes + err = reopen() + if err != nil { + return nil, errors.Wrap(err, "failed reopening file") + } + + // TODO: rewrite more cleanly + s2hash := sha256.New() + mdhash := md5.New() + if slices.Contains(algorithms, manifest.HashAlgorithmSHA256) && slices.Contains(algorithms, manifest.HashAlgorithmMD5) { + mw := io.MultiWriter(s2hash, mdhash) + if _, err := io.Copy(mw, file); err != nil { + return nil, errors.Wrap(err, "failed computing SHA256 and MD5 hashes") + } + p.Hashes.Sha256 = s2hash.Sum(nil) + p.Hashes.Md5 = mdhash.Sum(nil) + } else { + if slices.Contains(algorithms, manifest.HashAlgorithmSHA256) { + if _, err := io.Copy(s2hash, file); err != nil { + return nil, errors.Wrap(err, "failed computing SHA256 hash") + } + p.Hashes.Sha256 = s2hash.Sum(nil) + } + if slices.Contains(algorithms, manifest.HashAlgorithmMD5) { + if _, err := io.Copy(mdhash, file); err != nil { + return nil, errors.Wrap(err, "failed computing MD5 hash") + } + p.Hashes.Md5 = mdhash.Sum(nil) + } + } + + p.EnhanceLink(&link) + return &link, nil +} + +func isWEBPAnimated(file io.Reader) (bool, error) { + _, data, err := riff.NewReader(file) + if err != nil { + return false, errors.Wrap(err, "failed reading RIFF data from WEBP file") + } + id, _, _, err := data.Next() + var frames uint32 + for err == nil { + if id == riff.FourCC([4]byte{'A', 'N', 'M', 'F'}) { + frames++ + } + id, _, _, err = data.Next() + } + if err != io.EOF { + return false, errors.Wrap(err, "failed reading RIFF chunks from WEBP file") + } + return frames > 1, nil +} diff --git a/pkg/archive/archive.go b/pkg/archive/archive.go index e4f68300..68a44375 100644 --- a/pkg/archive/archive.go +++ b/pkg/archive/archive.go @@ -1,15 +1,22 @@ package archive import ( + "context" "errors" "io" "os" + + "github.com/readium/go-toolkit/pkg/util/url" ) type ArchiveFactory interface { - Open(filepath string, password string) (Archive, error) // Opens an archive from a local [file]. - OpenBytes(data []byte, password string) (Archive, error) // Opens an archive from a [data] slice. - OpenReader(reader ReaderAtCloser, size int64, password string, minimizeReads bool) (Archive, error) // Opens an archive from a reader. + Open(ctx context.Context, location url.URL, password string) (Archive, error) // Opens an archive from a location. + OpenBytes(ctx context.Context, data []byte, password string) (Archive, error) // Opens an archive from a [data] slice. + OpenReader(ctx context.Context, reader ReaderAtCloser, size int64, password string, minimizeReads bool) (Archive, error) // Opens an archive from a reader. +} + +type SchemeSpecificArchiveFactory interface { + CanOpen(url.Scheme) bool // Whether this factory can open the given scheme. } type DefaultArchiveFactory struct { @@ -18,20 +25,25 @@ type DefaultArchiveFactory struct { } // Open implements ArchiveFactory -func (e DefaultArchiveFactory) Open(filepath string, password string) (Archive, error) { - st, err := os.Stat(filepath) +func (e DefaultArchiveFactory) Open(ctx context.Context, location url.URL, password string) (Archive, error) { + u := url.BaseFile.Resolve(location).(url.AbsoluteURL) + if u.Scheme() != url.SchemeFile { + return nil, errors.New("unsupported scheme " + u.Scheme().String()) + } + + st, err := os.Stat(u.Path()) if err != nil { return nil, err } if st.IsDir() { - return e.explodedFactory.Open(filepath, password) + return e.explodedFactory.Open(u.Path(), password) } else { - return e.gozipFactory.Open(filepath, password) + return e.gozipFactory.Open(u.Path(), password) } } // OpenBytes implements ArchiveFactory -func (e DefaultArchiveFactory) OpenBytes(data []byte, password string) (Archive, error) { +func (e DefaultArchiveFactory) OpenBytes(ctx context.Context, data []byte, password string) (Archive, error) { if data == nil { return nil, errors.New("archive is nil") } @@ -39,13 +51,18 @@ func (e DefaultArchiveFactory) OpenBytes(data []byte, password string) (Archive, } // OpenBytes implements ArchiveFactory -func (e DefaultArchiveFactory) OpenReader(reader ReaderAtCloser, size int64, password string, minimizeReads bool) (Archive, error) { +func (e DefaultArchiveFactory) OpenReader(ctx context.Context, reader ReaderAtCloser, size int64, password string, minimizeReads bool) (Archive, error) { if reader == nil { return nil, errors.New("archive is nil") } return e.gozipFactory.OpenReader(reader, size, password, minimizeReads) } +// CanOpenScheme implements SchemeSpecificArchiveFactory +func (e DefaultArchiveFactory) CanOpenScheme(scheme url.Scheme) bool { + return scheme == url.SchemeFile +} + func NewArchiveFactory() DefaultArchiveFactory { return DefaultArchiveFactory{} } @@ -63,7 +80,6 @@ type Entry interface { StreamCompressedGzip(w io.Writer) (int64, error) // Streams the compressed content of this entry to a writer in a GZIP container. ReadCompressed() ([]byte, error) // Reads the compressed content of this entry. ReadCompressedGzip() ([]byte, error) // Reads the compressed content of this entry inside a GZIP container. - } // Represents an immutable archive. diff --git a/pkg/archive/archive_exploded.go b/pkg/archive/archive_exploded.go index 1f1a76bc..e94478c7 100644 --- a/pkg/archive/archive_exploded.go +++ b/pkg/archive/archive_exploded.go @@ -55,10 +55,7 @@ func (e explodedArchiveEntry) Read(start int64, end int64) ([]byte, error) { } data := make([]byte, end-start+1) n, err := f.Read(data) - if err != nil { - return nil, err - } - return data[:n], nil + return data[:n], err } func (e explodedArchiveEntry) Stream(w io.Writer, start int64, end int64) (int64, error) { @@ -107,10 +104,12 @@ type explodedArchive struct { directory string // Directory, already cleaned! } +// Close implements Archive func (a explodedArchive) Close() { // Nothing needs to be done } +// Entries implements Archive func (a explodedArchive) Entries() []Entry { entries := make([]Entry, 0) filepath.WalkDir(a.directory, func(path string, d fs.DirEntry, err error) error { @@ -131,6 +130,7 @@ func (a explodedArchive) Entries() []Entry { return entries } +// Entry implements Archive func (a explodedArchive) Entry(path string) (Entry, error) { if !fs.ValidPath(path) { return nil, fs.ErrNotExist @@ -159,6 +159,7 @@ func NewExplodedArchive(directory string) Archive { type explodedArchiveFactory struct{} +// Open implements ArchiveFactory func (e explodedArchiveFactory) Open(filepath string, password string) (Archive, error) { st, err := os.Stat(filepath) if err != nil { diff --git a/pkg/archive/archive_gcs.go b/pkg/archive/archive_gcs.go new file mode 100644 index 00000000..c32aa06f --- /dev/null +++ b/pkg/archive/archive_gcs.go @@ -0,0 +1,102 @@ +package archive + +import ( + "archive/zip" + "context" + "io" + + "cloud.google.com/go/storage" + "github.com/pkg/errors" + "github.com/readium/go-toolkit/pkg/util/url" +) + +type GCSArchiveFactory struct { + client *storage.Client + config RemoteArchiveConfig +} + +// Open implements ArchiveFactory +func (e GCSArchiveFactory) Open(ctx context.Context, location url.URL, password string) (Archive, error) { + // Go's built-in zip reader doesn't support passwords. + if password != "" { + return nil, errors.New("password-protected archives not supported") + } + + absLocation, ok := location.(url.AbsoluteURL) + if !ok { + return nil, errors.New("GCS archive location is not an absolute URL") + } + handle, err := absLocation.ToGSObject(e.client) + if err != nil { + return nil, errors.Wrap(err, "invalid GCS archive location") + } + + // Get object attributes + attrs, err := handle.Attrs(ctx) + if err != nil { + return nil, errors.Wrap(err, "failed to get GCS archive's attributes") + } + + // Setup remote ZIP archive reading + rdr := newRemoteZIPAdapter(RemoteArchiveReaderFromGCS(handle, attrs), e.config) + r, err := zip.NewReader(rdr, attrs.Size) + if err != nil { + return nil, err + } + rdr.makeReady() + + return &gozipArchive{ + zip: r, + minimizeReads: true, + closer: rdr.Close, + }, nil +} + +// CanOpen implements SchemeSpecificArchiveFactory +func (e GCSArchiveFactory) CanOpen(scheme url.Scheme) bool { + return scheme == url.SchemeGS +} + +// OpenBytes implements ArchiveFactory +func (e GCSArchiveFactory) OpenBytes(ctx context.Context, data []byte, password string) (Archive, error) { + return nil, errors.New("GCS archives must be opened with Open") +} + +// OpenReader implements ArchiveFactory +func (e GCSArchiveFactory) OpenReader(ctx context.Context, reader ReaderAtCloser, size int64, password string, minimizeReads bool) (Archive, error) { + return nil, errors.New("GCS archives must be opened with Open") +} + +func NewGCSArchiveFactory(client *storage.Client, config RemoteArchiveConfig) GCSArchiveFactory { + return GCSArchiveFactory{ + client: client, + config: config, + } +} + +// GCS-specific reader +type remoteGCSReader struct { + handle *storage.ObjectHandle + attrs *storage.ObjectAttrs +} + +func (r remoteGCSReader) ReadRange(ctx context.Context, offset, length int64) (io.ReadCloser, error) { + rdr, err := r.handle.NewRangeReader(ctx, offset, length) + if err != nil { + return nil, err + } + + // User is responsible for closing the reader + return rdr, nil +} + +func (r remoteGCSReader) Size() int64 { + return r.attrs.Size +} + +func RemoteArchiveReaderFromGCS(handle *storage.ObjectHandle, attrs *storage.ObjectAttrs) RemoteArchiveReader { + return &remoteGCSReader{ + handle: handle, + attrs: attrs, + } +} diff --git a/pkg/archive/archive_http.go b/pkg/archive/archive_http.go new file mode 100644 index 00000000..e10517f5 --- /dev/null +++ b/pkg/archive/archive_http.go @@ -0,0 +1,143 @@ +package archive + +import ( + "archive/zip" + "context" + "io" + "net/http" + "slices" + "strconv" + "strings" + + "github.com/pkg/errors" + "github.com/readium/go-toolkit/pkg/util/url" +) + +type HTTPArchiveFactory struct { + client *http.Client + config RemoteArchiveConfig +} + +// Open implements ArchiveFactory +func (e HTTPArchiveFactory) Open(ctx context.Context, location url.URL, password string) (Archive, error) { + // Go's built-in zip reader doesn't support passwords. + if password != "" { + return nil, errors.New("password-protected archives not supported") + } + + absLocation, ok := location.(url.AbsoluteURL) + if !ok { + return nil, errors.New("HTTP archive location is not an absolute URL") + } + + req, err := http.NewRequestWithContext(ctx, http.MethodHead, absLocation.String(), nil) + if err != nil { + return nil, err + } + resp, err := e.client.Do(req) + if err != nil { + return nil, err + } + defer resp.Body.Close() + + // If it's not code 200, the file doesn't exist + if resp.StatusCode != http.StatusOK { + return nil, errors.Errorf("HTTP HEAD request failed with status code: %d", resp.StatusCode) + } + + // HTTP server *must* support byte range requests + arvs := resp.Header.Values("Accept-Ranges") + if !slices.Contains(arvs, "bytes") { + return nil, errors.New("HTTP server does not support byte range requests") + } + + // HTTP server *must* return Content-Length header + if resp.ContentLength <= 0 { + return nil, errors.New("HTTP server returned zero content length") + } + + // Setup remote ZIP archive reading + rdr := newRemoteZIPAdapter(RemoteArchiveReaderFromHTTP(e.client, absLocation, resp.ContentLength), e.config) + r, err := zip.NewReader(rdr, resp.ContentLength) + if err != nil { + return nil, err + } + rdr.makeReady() + + return &gozipArchive{ + zip: r, + minimizeReads: true, + closer: rdr.Close, + }, nil +} + +// CanOpen implements SchemeSpecificArchiveFactory +func (e HTTPArchiveFactory) CanOpen(scheme url.Scheme) bool { + return scheme == url.SchemeHTTP || scheme == url.SchemeHTTPS +} + +// OpenBytes implements ArchiveFactory +func (e HTTPArchiveFactory) OpenBytes(ctx context.Context, data []byte, password string) (Archive, error) { + return nil, errors.New("HTTP archives must be opened with Open") +} + +// OpenReader implements ArchiveFactory +func (e HTTPArchiveFactory) OpenReader(ctx context.Context, reader ReaderAtCloser, size int64, password string, minimizeReads bool) (Archive, error) { + return nil, errors.New("HTTP archives must be opened with Open") +} + +func NewHTTPArchiveFactory(client *http.Client, config RemoteArchiveConfig) HTTPArchiveFactory { + return HTTPArchiveFactory{ + client: client, + config: config, + } +} + +// HTTP-specific reader +type remoteHTTPReader struct { + client *http.Client + url string + size int64 +} + +func (r remoteHTTPReader) ReadRange(ctx context.Context, offset, length int64) (io.ReadCloser, error) { + if offset < 0 { + return nil, io.EOF + } + + req, err := http.NewRequestWithContext(ctx, http.MethodGet, r.url, nil) + if err != nil { + return nil, err + } + var sb strings.Builder + sb.WriteString("bytes=") + sb.WriteString(strconv.FormatInt(offset, 10)) + sb.WriteString("-") + if length > 0 { + sb.WriteString(strconv.FormatInt(offset+length-1, 10)) + } + req.Header.Set("Range", sb.String()) + + resp, err := r.client.Do(req) + if err != nil { + return nil, err + } + if resp.StatusCode != http.StatusPartialContent { + return nil, errors.New("unexpected HTTP status code: " + strconv.Itoa(resp.StatusCode)) + } + + // User is responsible for closing the body + return resp.Body, nil +} + +func (r remoteHTTPReader) Size() int64 { + return r.size +} + +func RemoteArchiveReaderFromHTTP(client *http.Client, url url.AbsoluteURL, size int64) RemoteArchiveReader { + return &remoteHTTPReader{ + client: client, + url: url.String(), + size: size, + } +} diff --git a/pkg/archive/archive_s3.go b/pkg/archive/archive_s3.go new file mode 100644 index 00000000..03688f3f --- /dev/null +++ b/pkg/archive/archive_s3.go @@ -0,0 +1,125 @@ +package archive + +import ( + "archive/zip" + "context" + "io" + "strconv" + "strings" + + "github.com/aws/aws-sdk-go-v2/aws" + "github.com/aws/aws-sdk-go-v2/service/s3" + "github.com/pkg/errors" + "github.com/readium/go-toolkit/pkg/util/url" +) + +type S3ArchiveFactory struct { + client *s3.Client + config RemoteArchiveConfig +} + +// Open implements ArchiveFactory +func (e S3ArchiveFactory) Open(ctx context.Context, location url.URL, password string) (Archive, error) { + // Go's built-in zip reader doesn't support passwords. + if password != "" { + return nil, errors.New("password-protected archives not supported") + } + + absLocation, ok := location.(url.AbsoluteURL) + if !ok { + return nil, errors.New("S3 archive location is not an absolute URL") + } + input, err := absLocation.ToS3Object() + if err != nil { + return nil, errors.Wrap(err, "invalid S3 archive location") + } + + // Get object attributes + output, err := e.client.HeadObject(ctx, &s3.HeadObjectInput{ + Bucket: input.Bucket, + Key: input.Key, + }) + if err != nil { + return nil, errors.Wrap(err, "failed to get S3 archive's attributes") + } + + // Setup remote ZIP archive reading + rdr := newRemoteZIPAdapter(RemoteArchiveReaderFromS3(e.client, *output, *input), e.config) + r, err := zip.NewReader(rdr, *output.ContentLength) + if err != nil { + return nil, err + } + rdr.makeReady() + + return &gozipArchive{ + zip: r, + minimizeReads: true, + closer: rdr.Close, + }, nil +} + +// CanOpen implements SchemeSpecificArchiveFactory +func (e S3ArchiveFactory) CanOpen(scheme url.Scheme) bool { + return scheme == url.SchemeS3 +} + +// OpenBytes implements ArchiveFactory +func (e S3ArchiveFactory) OpenBytes(ctx context.Context, data []byte, password string) (Archive, error) { + return nil, errors.New("S3 archives must be opened with Open") +} + +// OpenReader implements ArchiveFactory +func (e S3ArchiveFactory) OpenReader(ctx context.Context, reader ReaderAtCloser, size int64, password string, minimizeReads bool) (Archive, error) { + return nil, errors.New("S3 archives must be opened with Open") +} + +func NewS3ArchiveFactory(client *s3.Client, config RemoteArchiveConfig) S3ArchiveFactory { + return S3ArchiveFactory{ + client: client, + config: config, + } +} + +// S3-specific reader +type remoteS3Reader struct { + client *s3.Client + input s3.GetObjectInput + head s3.HeadObjectOutput +} + +func (r remoteS3Reader) ReadRange(ctx context.Context, offset, length int64) (io.ReadCloser, error) { + if offset < 0 { + return nil, io.EOF + } + + var sb strings.Builder + sb.WriteString("bytes=") + sb.WriteString(strconv.FormatInt(offset, 10)) + sb.WriteString("-") + if length >= 0 { + sb.WriteString(strconv.FormatInt(offset+length-1, 10)) + } + r.input.Range = aws.String(sb.String()) + result, err := r.client.GetObject(ctx, &r.input) + if err != nil { + return nil, err + } + + // User is responsible for closing the body + return result.Body, nil +} + +func (r remoteS3Reader) Size() int64 { + if r.head.ContentLength != nil { + return *r.head.ContentLength + } + return 0 +} + +func RemoteArchiveReaderFromS3(client *s3.Client, output s3.HeadObjectOutput, input s3.GetObjectInput) RemoteArchiveReader { + return &remoteS3Reader{ + client: client, + input: input, + head: output, + } +} diff --git a/pkg/archive/archive_test.go b/pkg/archive/archive_test.go index 75a4faad..fe25a4da 100644 --- a/pkg/archive/archive_test.go +++ b/pkg/archive/archive_test.go @@ -4,6 +4,7 @@ import ( "bytes" "testing" + "github.com/readium/go-toolkit/pkg/util/url" "github.com/stretchr/testify/assert" ) @@ -26,7 +27,8 @@ var entryList = []string{ func withArchives(t *testing.T, callback func(archive Archive)) { for _, archivePath := range archives { t.Log(archivePath) - archive, err := DefaultArchiveFactory{}.Open(archivePath, "") + u, _ := url.FromFilepath(archivePath) + archive, err := DefaultArchiveFactory{}.Open(t.Context(), u, "") assert.NoError(t, err) callback(archive) } diff --git a/pkg/archive/archive_zip.go b/pkg/archive/archive_zip.go index 6eb05ead..d0968cd7 100644 --- a/pkg/archive/archive_zip.go +++ b/pkg/archive/archive_zip.go @@ -97,12 +97,13 @@ func (e gozipArchiveEntry) Read(start int64, end int64) ([]byte, error) { return nil, err } } - data := make([]byte, min(end-start+1, int64(e.file.UncompressedSize64))) - _, err = io.ReadFull(f, data) - if err != nil { - return nil, err + data := make([]byte, end-start+1) + n, err := f.Read(data) + if n > 0 && err == io.EOF { + // Not EOF error if some data was read + err = nil } - return data, nil + return data[:n], err } func (e gozipArchiveEntry) Stream(w io.Writer, start int64, end int64) (int64, error) { @@ -149,10 +150,11 @@ func (e gozipArchiveEntry) Stream(w io.Writer, start int64, end int64) (int64, e } } n, err := io.CopyN(w, f, end-start+1) - if err != nil && err != io.EOF { - return n, err + if n > 0 && err == io.EOF { + // Not EOF error if some data was read + err = nil } - return n, nil + return n, err } func (e gozipArchiveEntry) StreamCompressed(w io.Writer) (int64, error) { @@ -263,10 +265,12 @@ type gozipArchive struct { minimizeReads bool } +// Close implements Archive func (a *gozipArchive) Close() { a.closer() } +// Entries implements Archive func (a *gozipArchive) Entries() []Entry { entries := make([]Entry, 0, len(a.zip.File)) for _, f := range a.zip.File { @@ -287,6 +291,7 @@ func (a *gozipArchive) Entries() []Entry { return entries } +// Entry implements Archive func (a *gozipArchive) Entry(p string) (Entry, error) { if !fs.ValidPath(p) { return nil, fs.ErrNotExist @@ -323,6 +328,7 @@ func NewGoZIPArchive(zip *zip.Reader, closer func() error, minimizeReads bool) A type gozipArchiveFactory struct{} +// Open implements ArchiveFactory func (e gozipArchiveFactory) Open(filepath string, password string) (Archive, error) { // Go's built-in zip reader doesn't support passwords. if password != "" { @@ -336,6 +342,7 @@ func (e gozipArchiveFactory) Open(filepath string, password string) (Archive, er return NewGoZIPArchive(&rc.Reader, rc.Close, false), nil } +// OpenBytes implements ArchiveFactory func (e gozipArchiveFactory) OpenBytes(data []byte, password string) (Archive, error) { // Go's built-in zip reader doesn't support passwords. if password != "" { @@ -354,6 +361,7 @@ type ReaderAtCloser interface { io.ReaderAt } +// OpenReader implements ArchiveFactory func (e gozipArchiveFactory) OpenReader(reader ReaderAtCloser, size int64, password string, minimizeReads bool) (Archive, error) { // Go's built-in zip reader doesn't support passwords. if password != "" { diff --git a/pkg/archive/remote.go b/pkg/archive/remote.go new file mode 100644 index 00000000..06478361 --- /dev/null +++ b/pkg/archive/remote.go @@ -0,0 +1,342 @@ +package archive + +import ( + "archive/zip" + "bytes" + "context" + "encoding/binary" + "io" + "sync" + "time" + + "github.com/pkg/errors" +) + +type RemoteArchiveConfig struct { + Timeout time.Duration // Timeout for remote requests to read from the archive + CacheAllThreshold int64 // Threshold for caching the entire ZIP + CacheSizeThreshold int64 // Threshold for caching of a single entry in the ZIP + CacheCountThreshold int64 // Threshold for the number of entries in the ZIP to cache +} + +func (c RemoteArchiveConfig) Empty() bool { + return c.Timeout == 0 && c.CacheSizeThreshold == 0 && c.CacheCountThreshold == 0 && c.CacheAllThreshold == 0 +} + +func NewDefaultRemoteArchiveConfig() RemoteArchiveConfig { + return RemoteArchiveConfig{ + Timeout: time.Second * 60, // 1 minute + CacheSizeThreshold: 1024 * 1024, // 1MB + CacheCountThreshold: 32, // 32 items + CacheAllThreshold: 1024 * 1024, // 1MB + } +} + +type RemoteArchiveReader interface { + Size() int64 // Size of the remote archive object + ReadRange(ctx context.Context, offset, length int64) (io.ReadCloser, error) // Negative length means "read to the end" +} + +type readRange struct { + HeaderOffset int64 // Offset of the local file header + Offset int64 // Offset of the file body + Size int64 // Size of the file body in the archive + Header [30]byte // Local file header + Data []byte // File body +} + +// Read ZIP archives from the a remote location efficiently +type remoteZIPAdapter struct { + rdr RemoteArchiveReader // Remote archive reader + zipReady bool // Is the ZIP file opened by Go's zip reader? + timeout time.Duration // // Timeout for remote requests to read from the archive + + cacheAllThreshold int64 // Threshold for caching the entire ZIP + cacheSizeThreshold int64 // Threshold for caching of a single entry in the ZIP + cacheCountThreshold int64 // Threshold for the number of entries in the ZIP to cache + cachedRanges []readRange // Cached byte ranges of the ZIP file + cacheMutex sync.RWMutex // Mutex for the cached ranges + completeBytes []byte // Entire ZIP file in memory + + // No mutex here, because it's only set once during the ZIP opening procedure + zipTail []byte + zipTailSize int64 +} + +func (r *remoteZIPAdapter) cacheAll() bool { + return r.rdr.Size() <= r.cacheAllThreshold +} + +// ReadAt implements io.ReaderAt +func (r *remoteZIPAdapter) ReadAt(p []byte, off int64) (int, error) { + if off < 0 { + return 0, errors.New("read negative offset") + } + + if len(p) == 0 { + return 0, errors.New("read into empty byte slice") + } + + // Limited amount of time to perform the read + ctx, cancel := context.WithTimeout(context.Background(), r.timeout) + defer cancel() + + if r.cacheAll() { // Read from a complete in-memory copy of the publication + if len(r.completeBytes) == 0 { + rdr, err := r.rdr.ReadRange(ctx, 0, r.rdr.Size()) + if err != nil { + return 0, err + } + defer rdr.Close() + r.completeBytes = make([]byte, r.rdr.Size()) + n, err := io.ReadFull(rdr, r.completeBytes) // Read the entire object into memory + if err != nil { + return n, err + } + } + // Perform ReadAt on the in-memory copy of the publication + return bytes.NewReader(r.completeBytes).ReadAt(p, off) + } + + // Special accomodation to speed up zip reader scanning the end of the file for the central directory + if !r.zipReady { + tailOffset := r.rdr.Size() - r.zipTailSize + newOff := off - tailOffset + + if newOff < 0 { + // The central directory is really long, we can't use the cached version + // Instead, we increase its size to include the requested offset + r.zipTail = nil + r.zipTailSize -= newOff + tailOffset = r.rdr.Size() - r.zipTailSize + newOff = off - tailOffset + } + if len(r.zipTail) > 0 { + n := copy(p, r.zipTail[newOff:newOff+int64(len(p))]) + return n, nil + } + newZipTail := make([]byte, r.zipTailSize) + + rdr, err := r.rdr.ReadRange(ctx, tailOffset, r.rdr.Size()) + if err != nil { + return 0, err + } + defer rdr.Close() + _, err = io.ReadFull(rdr, newZipTail) // Read tail of file into memory + if err != nil { + newZipTail = nil + return 0, err + } + n := copy(p, newZipTail[newOff:newOff+int64(len(p))]) + r.zipTail = newZipTail + return n, nil + } + + size := int64(len(p)) + var n int + + if size == 30 && r.cacheCountThreshold > 0 && r.cacheSizeThreshold > 0 { + // 30 bytes is the size of a ZIP's local file header + // There could theoretically be a real file with compressed or uncompressed length of 30 bytes, + // but this is not that likely in an EPUB. So this is a good enough heuristic to use. + + // First, check if we've already read this header as a shortcut + r.cacheMutex.RLock() + for _, rng := range r.cachedRanges { + if rng.HeaderOffset == off { + r.cacheMutex.RUnlock() + return copy(p, rng.Header[:]), nil + } + } + r.cacheMutex.RUnlock() + + // We start reading at the offset of the local file header, with the assumption that the actual + // file content follows right after. This way, we only need to start a read from the remote *one* time. + rdr, err := r.rdr.ReadRange(ctx, off, -1) + if err != nil { + return 0, err + } + + var fileHeaderBuf [30]byte + n, err = rdr.Read(fileHeaderBuf[:]) + if err != nil { + rdr.Close() + return 0, errors.Wrap(err, "failed reading local file header bytes") + } + if fileHeaderBuf[0] == 'P' && fileHeaderBuf[1] == 'K' && fileHeaderBuf[2] == 0x03 && fileHeaderBuf[3] == 0x04 { + // PK\x05\x06 is the signature of a ZIP's local file header. This confirms our suspsicion that it's + // what it seems. The possibility of it being something else is very very low at this point. + + // Get compression method + compressionMethod := binary.LittleEndian.Uint16(fileHeaderBuf[8:]) + var bodySize uint32 + + b := fileHeaderBuf[18:] + + compressedSize := binary.LittleEndian.Uint32(b) + uncompressedSize := binary.LittleEndian.Uint32(b[4:]) + + if compressedSize == 0 && uncompressedSize == 0 { + // No file size given. It's not great, but it's technically still valid. + // Happens especially if the author of the ZIP is streaming the contents into it, + // e.g. with Go, where if you write a streaming ZIP, the size is not known in advance. + + // We can still at least cache the file header + r.cacheMutex.Lock() + if len(r.cachedRanges) >= int(r.cacheCountThreshold) { + // Remove the oldest range + r.cachedRanges = r.cachedRanges[1:] + } + + r.cachedRanges = append(r.cachedRanges, readRange{ + HeaderOffset: off, + Header: fileHeaderBuf, + }) + r.cacheMutex.Unlock() + } else if compressedSize == 0xFFFFFFFF && uncompressedSize == 0xFFFFFFFF { + // ZIP64 is not supported by this routine + } else { + if compressionMethod == zip.Store { + // File is uncompressed + bodySize = uncompressedSize + } else { + // File is compressed + bodySize = compressedSize + } + + // Now the important part - we precache the actual file! + + // ...but only if it's not too big + if int64(bodySize) <= r.cacheSizeThreshold { + // Remaining local file headers are needed to get the total size of useless stuff + filenameLength := binary.LittleEndian.Uint16(b[8:]) + extraFieldLength := binary.LittleEndian.Uint16(b[10:]) + useless := int64(extraFieldLength) + int64(filenameLength) + bodyOffset := off + 30 + useless + + r.cacheMutex.RLock() + var hasSameRange bool + for _, rng := range r.cachedRanges { + if rng.Offset == bodyOffset && rng.Size == int64(bodySize) { + hasSameRange = true + break + } + } + r.cacheMutex.RUnlock() + if !hasSameRange { + // Allocate a slice to hold the filename, extra field and file body + rest := make([]byte, int64(bodySize)+useless) + _, err := io.ReadAtLeast(rdr, rest, len(rest)) + if err != nil { + rdr.Close() + return 0, errors.Wrap(err, "failed reading rest of zip file bytes for precaching") + } + + // Write to cache + r.cacheMutex.Lock() + if len(r.cachedRanges) >= int(r.cacheCountThreshold) { + // Remove the oldest range + r.cachedRanges = r.cachedRanges[1:] + } + + r.cachedRanges = append(r.cachedRanges, readRange{ + HeaderOffset: off, + Offset: bodyOffset, + Size: int64(bodySize), + Header: fileHeaderBuf, + Data: rest[useless:], // Trim off the filename and extra field, just store the body + }) + r.cacheMutex.Unlock() + } + } + } + } + copy(p, fileHeaderBuf[:]) // Copy the 30 read bytes + io.Copy(io.Discard, rdr) // Discard the rest of the read + rdr.Close() // Then close it + } else { + // Check all the cache ranges to see if what we're looking for is somewhere inside a cached range + // This is especially useful when doing a range read / stream of e.g. 4096-byte chunks + r.cacheMutex.RLock() + for _, rng := range r.cachedRanges { + if off >= rng.Offset && off < rng.Offset+rng.Size && off+size <= rng.Offset+rng.Size { + // Found a range that contains the requested range + // Extract the relevant part of the range + n = copy(p, rng.Data[off-rng.Offset:off-rng.Offset+size]) + r.cacheMutex.RUnlock() + return n, nil + } + } + r.cacheMutex.RUnlock() + + // Cache miss, need to read a brand new range + rdr, err := r.rdr.ReadRange(ctx, off, size) + if err != nil { + return 0, err + } + defer rdr.Close() + + n, err = io.ReadFull(rdr, p) // Read range into containing slice + if err != nil { + return n, err + } + + if size > r.cacheSizeThreshold { + return n, nil // Too big to cache, just return + } + } + + // Write to cache + r.cacheMutex.Lock() + var hasSameRange bool + for _, rng := range r.cachedRanges { + if rng.Offset == off && rng.Size == size { + hasSameRange = true + break + } + } + if !hasSameRange { + if len(r.cachedRanges) >= int(r.cacheCountThreshold) { + // Remove the oldest range + r.cachedRanges = r.cachedRanges[1:] + } + + r.cachedRanges = append(r.cachedRanges, readRange{ + HeaderOffset: -1, + Offset: off, + Size: size, + Data: p, + }) + } + r.cacheMutex.Unlock() + + return n, nil +} + +func (r *remoteZIPAdapter) makeReady() { + r.zipReady = true + r.zipTail = nil +} + +func (r *remoteZIPAdapter) Close() error { + clear(r.cachedRanges) + return nil +} + +func newRemoteZIPAdapter(rdr RemoteArchiveReader, config RemoteArchiveConfig) *remoteZIPAdapter { + if config.Empty() { + config = NewDefaultRemoteArchiveConfig() + } + r := &remoteZIPAdapter{ + rdr: rdr, + timeout: config.Timeout, + cacheSizeThreshold: config.CacheSizeThreshold, + cacheCountThreshold: config.CacheCountThreshold, + cacheAllThreshold: config.CacheAllThreshold, + zipTailSize: 65 * 1024, // 65KB + } + if !r.cacheAll() { + r.cachedRanges = make([]readRange, 0, r.cacheCountThreshold) + } + return r +} diff --git a/pkg/asset/asset_file.go b/pkg/asset/asset_file.go index 8814b08e..21e68386 100644 --- a/pkg/asset/asset_file.go +++ b/pkg/asset/asset_file.go @@ -1,59 +1,67 @@ package asset import ( + "context" + "errors" "os" "path/filepath" + "github.com/readium/go-toolkit/pkg/archive" "github.com/readium/go-toolkit/pkg/fetcher" "github.com/readium/go-toolkit/pkg/mediatype" + "github.com/readium/go-toolkit/pkg/util/url" ) // Represents a publication stored as a file on the local file system. type FileAsset struct { - filepath string + uri url.URL mediatype *mediatype.MediaType knownMediaType *mediatype.MediaType mediaTypeHint string } -func File(filepath string) *FileAsset { +func File(uri url.URL) *FileAsset { return &FileAsset{ - filepath: filepath, + uri: uri, } } // Creates a [FileAsset] from a [File] and an optional media type, when known. -func FileWithMediaType(filepath string, mediatype *mediatype.MediaType) *FileAsset { +func FileWithMediaType(uri url.URL, mediatype *mediatype.MediaType) *FileAsset { return &FileAsset{ - filepath: filepath, + uri: uri, knownMediaType: mediatype, } } // Creates a [FileAsset] from a [File] and an optional media type hint. // Providing a media type hint will improve performances when sniffing the media type. -func FileWithMediaTypeHint(filepath string, mediatypeHint string) *FileAsset { +func FileWithMediaTypeHint(uri url.URL, mediatypeHint string) *FileAsset { return &FileAsset{ - filepath: filepath, + uri: uri, mediaTypeHint: mediatypeHint, } } // Name implements PublicationAsset func (a *FileAsset) Name() string { - return filepath.Base(a.filepath) + return a.uri.Filename() +} + +func (a *FileAsset) realPath() string { + return filepath.ToSlash(a.uri.Path()) } // MediaType implements PublicationAsset -func (a *FileAsset) MediaType() mediatype.MediaType { +func (a *FileAsset) MediaType(ctx context.Context) mediatype.MediaType { if a.mediatype == nil { if a.knownMediaType != nil { a.mediatype = a.knownMediaType } else { - fil, err := os.Open(a.filepath) + fil, err := os.Open(a.realPath()) if err == nil { // No problem opening the file defer fil.Close() - a.mediatype = mediatype.OfFile(fil, []string{a.mediaTypeHint}, nil, mediatype.Sniffers) + a.mediatype = mediatype.OfFile(ctx, fil, []string{a.mediaTypeHint}, nil, mediatype.Sniffers) } if a.mediatype == nil { // Still nothing found a.mediatype = &mediatype.Binary @@ -64,19 +72,32 @@ func (a *FileAsset) MediaType() mediatype.MediaType { } // CreateFetcher implements PublicationAsset -func (a *FileAsset) CreateFetcher(dependencies Dependencies, credentials string) (fetcher.Fetcher, error) { - stat, err := os.Stat(a.filepath) +func (a *FileAsset) CreateFetcher(ctx context.Context, dependencies Dependencies, credentials string) (fetcher.Fetcher, error) { + if u, ok := a.uri.(url.AbsoluteURL); ok && !u.IsFile() { + return nil, errors.New("file asset with absolute URL must have file:/// scheme") + } + + rfp := a.realPath() + stat, err := os.Stat(rfp) if err != nil { return nil, err } if stat.IsDir() { - return fetcher.NewFileFetcher("", a.filepath), nil + return fetcher.NewFileFetcher("", rfp), nil } else { - af, err := fetcher.NewArchiveFetcherFromPathWithFactory(a.filepath, dependencies.ArchiveFactory) + factory, ok := dependencies.ArchiveFactory.(archive.SchemeSpecificArchiveFactory) + if ok { + // If we can narrow down the schemes the factory supports, we enforce it + if !factory.CanOpen(url.SchemeFile) { + return nil, errors.New("provided ArchiveFactory does not support file scheme") + } + } + + af, err := fetcher.NewArchiveFetcherFromPathWithFactory(ctx, rfp, dependencies.ArchiveFactory) if err == nil { return af, nil } - return fetcher.NewFileFetcher(a.Name(), a.filepath), nil + return fetcher.NewFileFetcher(a.Name(), rfp), nil } } diff --git a/pkg/asset/asset_gcs.go b/pkg/asset/asset_gcs.go new file mode 100644 index 00000000..8d1e6879 --- /dev/null +++ b/pkg/asset/asset_gcs.go @@ -0,0 +1,144 @@ +package asset + +import ( + "context" + "errors" + "path" + "strings" + + "cloud.google.com/go/storage" + "github.com/readium/go-toolkit/pkg/archive" + "github.com/readium/go-toolkit/pkg/fetcher" + "github.com/readium/go-toolkit/pkg/mediatype" + "github.com/readium/go-toolkit/pkg/util/url" + "google.golang.org/api/iterator" +) + +// Represents a publication stored on an Amazon S3-compatible remote server. +type GCSAsset struct { + uri url.AbsoluteURL + client *storage.Client + + mediatype *mediatype.MediaType + knownMediaType *mediatype.MediaType + + isDir *bool + attrs *storage.ObjectAttrs +} + +func GCS(client *storage.Client, uri url.AbsoluteURL) *GCSAsset { + return &GCSAsset{ + client: client, + uri: uri, + } +} + +// Creates a [S3Asset] from a [File] and an optional media type, when known. +func GCSWithMediaType(client *storage.Client, uri url.AbsoluteURL, mediatype *mediatype.MediaType) *GCSAsset { + return &GCSAsset{ + client: client, + uri: uri, + knownMediaType: mediatype, + } +} + +// Name implements PublicationAsset +func (a *GCSAsset) Name() string { + return path.Base(a.uri.Path()) +} + +func (a *GCSAsset) handle() (*storage.ObjectHandle, error) { + return a.uri.ToGSObject(a.client) +} + +func (a *GCSAsset) head(ctx context.Context) error { + if a.attrs != nil { + return nil + } + handle, err := a.handle() + if err != nil { + return err + } + a.attrs, err = handle.Attrs(ctx) + return err +} + +// MediaType implements PublicationAsset +func (a *GCSAsset) MediaType(ctx context.Context) mediatype.MediaType { + if a.mediatype == nil { + if a.knownMediaType != nil { + a.mediatype = a.knownMediaType + } else { + if err := a.head(ctx); err == nil { + // Note how we are *not* using the file contents to sniff the media type. + // We want to avoid unecessary requests at all costs. + if a.attrs.ContentType != "" { + a.mediatype = mediatype.OfStringAndExtension(a.attrs.ContentType, a.uri.Extension()) + } else { + a.mediatype = mediatype.OfExtension(a.uri.Extension()) + } + } + } + if a.mediatype == nil { // Still nothing found + a.mediatype = &mediatype.Binary + } + } + return *a.mediatype +} + +// CreateFetcher implements PublicationAsset +func (a *GCSAsset) CreateFetcher(ctx context.Context, dependencies Dependencies, credentials string) (fetcher.Fetcher, error) { + handle, err := a.handle() + if err != nil { + return nil, err + } + + var isDir bool + if a.isDir != nil { + isDir = *a.isDir + } else { + if strings.HasSuffix(handle.ObjectName(), "/") { + // Path ends in a slash, so it's a folder + isDir = true + } else { + // Not sure if it's a folder or a file, need to check + it := a.client.Bucket(handle.BucketName()).Objects(ctx, &storage.Query{ + Prefix: handle.ObjectName() + "/", + Delimiter: "/", + }) + _, err := it.Next() + if err == nil { + // Found a file with the same prefix, so it's a folder + isDir = true + } else if err != iterator.Done { + // Something else than EOF + return nil, err + } + } + a.isDir = &isDir + } + + if isDir || !a.MediaType(ctx).IsZIP() { + base := "" + if !isDir { + // There's some problem checking for the file's existance + if err = a.head(ctx); err != nil { + return nil, err + } + + base = a.Name() + } + return fetcher.NewGCSFetcher(base, a.client, handle), nil + } else { + factory, ok := dependencies.ArchiveFactory.(archive.SchemeSpecificArchiveFactory) + if !ok { + // It's not possible to determine if the factory actually supports archives on GCS + return nil, errors.New("provided ArchiveFactory does not implement SchemeSpecificArchiveFactory") + } + if !factory.CanOpen(url.SchemeGS) { + return nil, errors.New("provided ArchiveFactory does not support GS scheme") + } + + return fetcher.NewArchiveFetcherFromURLWithFactoryAndContext(ctx, a.uri, factory) + } +} diff --git a/pkg/asset/asset_http.go b/pkg/asset/asset_http.go new file mode 100644 index 00000000..cbf06135 --- /dev/null +++ b/pkg/asset/asset_http.go @@ -0,0 +1,144 @@ +package asset + +import ( + "context" + "net/http" + "path" + "slices" + "strings" + + "github.com/pkg/errors" + "github.com/readium/go-toolkit/pkg/archive" + "github.com/readium/go-toolkit/pkg/fetcher" + "github.com/readium/go-toolkit/pkg/mediatype" + "github.com/readium/go-toolkit/pkg/util/url" +) + +// Represents a publication stored on an Amazon S3-compatible remote server. +type HTTPAsset struct { + url url.AbsoluteURL + client *http.Client + + mediatype *mediatype.MediaType + knownMediaType *mediatype.MediaType + + fileSize int64 + contentType string +} + +func HTTP(client *http.Client, url url.AbsoluteURL) *HTTPAsset { + return &HTTPAsset{ + client: client, + url: url, + } +} + +// Creates a [HTTPAsset] from a [File] and an optional media type, when known. +func HTTPWithMediaType(client *http.Client, url url.AbsoluteURL, mediatype *mediatype.MediaType) *HTTPAsset { + return &HTTPAsset{ + client: client, + url: url, + knownMediaType: mediatype, + } +} + +// Name implements PublicationAsset +func (a *HTTPAsset) Name() string { + return path.Base(a.url.Path()) +} + +func (a *HTTPAsset) head(ctx context.Context) error { + if a.fileSize > 0 { + return nil + } + + req, err := http.NewRequestWithContext(ctx, http.MethodHead, a.url.String(), nil) + if err != nil { + return err + } + resp, err := a.client.Do(req) + if err != nil { + return err + } + defer resp.Body.Close() + + // If it's not code 200, the file doesn't exist + if resp.StatusCode != http.StatusOK { + return errors.Errorf("HTTP HEAD request failed with status code: %d", resp.StatusCode) + } + + // HTTP server *must* support byte range requests + arvs := resp.Header.Values("Accept-Ranges") + if !slices.Contains(arvs, "bytes") { + return errors.New("HTTP server does not support byte range requests") + } + + // HTTP server *must* return Content-Length header + if resp.ContentLength <= 0 { + return errors.New("HTTP server returned zero content length") + } + a.fileSize = resp.ContentLength + + // A good server will response with the correct content type for the file + contentType := resp.Header.Get("Content-Type") + if contentType != "application/octet-stream" { + a.contentType = contentType + } + + return nil +} + +// MediaType implements PublicationAsset +func (a *HTTPAsset) MediaType(ctx context.Context) mediatype.MediaType { + if a.mediatype == nil { + if a.knownMediaType != nil { + a.mediatype = a.knownMediaType + } else { + if err := a.head(ctx); err == nil { + // Note how we are *not* using the file contents to sniff the media type. + // We want to avoid unecessary requests at all costs. + if a.contentType != "" { + a.mediatype = mediatype.OfStringAndExtension(a.contentType, a.url.Extension()) + } else { + a.mediatype = mediatype.OfExtension(a.url.Extension()) + } + } + } + if a.mediatype == nil { // Still nothing found + a.mediatype = &mediatype.Binary + } + } + return *a.mediatype +} + +// CreateFetcher implements PublicationAsset +func (a *HTTPAsset) CreateFetcher(ctx context.Context, dependencies Dependencies, credentials string) (fetcher.Fetcher, error) { + // We can't determine if the provided path is a directory or not unless it ends in a "/" + // because we can't expect HTTP servers to be listing directory indexes, and even then we + // couldn't distinguish between a directory listing and a file. So no "/" is always a file. + isDir := strings.HasSuffix(a.url.Path(), "/") + + if isDir || !a.MediaType(ctx).IsZIP() { + base := "" + if !isDir { + // There's some problem checking for the file's existance + if err := a.head(ctx); err != nil { + return nil, err + } + + base = a.Name() + } + return fetcher.NewHTTPFetcher(base, a.client, a.url), nil + } else { + factory, ok := dependencies.ArchiveFactory.(archive.SchemeSpecificArchiveFactory) + if !ok { + // It's not possible to determine if the factory actually supports archives through HTTP + return nil, errors.New("provided ArchiveFactory does not implement SchemeSpecificArchiveFactory") + } + if !factory.CanOpen(url.SchemeHTTP) && !factory.CanOpen(url.SchemeHTTPS) { + return nil, errors.New("provided ArchiveFactory does not support HTTP or HTTPS scheme") + } + + return fetcher.NewArchiveFetcherFromURLWithFactoryAndContext(ctx, a.url, factory) + } +} diff --git a/pkg/asset/asset_publication.go b/pkg/asset/asset_publication.go index 808ed22e..8b1df0af 100644 --- a/pkg/asset/asset_publication.go +++ b/pkg/asset/asset_publication.go @@ -1,6 +1,8 @@ package asset import ( + "context" + "github.com/readium/go-toolkit/pkg/archive" "github.com/readium/go-toolkit/pkg/fetcher" "github.com/readium/go-toolkit/pkg/mediatype" @@ -12,7 +14,7 @@ type Dependencies struct { // Represents a digital medium (e.g. a file) offering access to a publication. type PublicationAsset interface { - Name() string // Name of the asset, e.g. a filename. - MediaType() mediatype.MediaType // Media type of the asset. If unknown, fallback on `MediaType.Binary`. - CreateFetcher(dependencies Dependencies, credentials string) (fetcher.Fetcher, error) // Creates a fetcher used to access the asset's content. + Name() string // Name of the asset, e.g. a filename. + MediaType(ctx context.Context) mediatype.MediaType // Media type of the asset. If unknown, fallback on `MediaType.Binary`. + CreateFetcher(ctx context.Context, dependencies Dependencies, credentials string) (fetcher.Fetcher, error) // Creates a fetcher used to access the asset's content. } diff --git a/pkg/asset/asset_s3.go b/pkg/asset/asset_s3.go new file mode 100644 index 00000000..f449712c --- /dev/null +++ b/pkg/asset/asset_s3.go @@ -0,0 +1,152 @@ +package asset + +import ( + "context" + "errors" + "path" + "strings" + + "github.com/aws/aws-sdk-go-v2/service/s3" + "github.com/readium/go-toolkit/pkg/archive" + "github.com/readium/go-toolkit/pkg/fetcher" + "github.com/readium/go-toolkit/pkg/mediatype" + "github.com/readium/go-toolkit/pkg/util/url" +) + +// Represents a publication stored on an Amazon S3-compatible remote server. +type S3Asset struct { + uri url.AbsoluteURL + client *s3.Client + + mediatype *mediatype.MediaType + knownMediaType *mediatype.MediaType + + isDir *bool + headData *s3.HeadObjectOutput +} + +func S3(client *s3.Client, uri url.AbsoluteURL) *S3Asset { + return &S3Asset{ + client: client, + uri: uri, + } +} + +// Creates a [S3Asset] from a [File] and an optional media type, when known. +func S3WithMediaType(client *s3.Client, uri url.AbsoluteURL, mediatype *mediatype.MediaType) *S3Asset { + return &S3Asset{ + client: client, + uri: uri, + knownMediaType: mediatype, + } +} + +// Name implements PublicationAsset +func (a *S3Asset) Name() string { + return path.Base(a.uri.Path()) +} + +func (a *S3Asset) object() (*s3.GetObjectInput, error) { + return a.uri.ToS3Object() +} + +func (a *S3Asset) head(ctx context.Context) error { + if a.headData != nil { + return nil + } + obj, err := a.object() + if err != nil { + return err + } + output, err := a.client.HeadObject(ctx, &s3.HeadObjectInput{ + Bucket: obj.Bucket, + Key: obj.Key, + }) + if err != nil { + return err + } + a.headData = output + return nil +} + +// MediaType implements PublicationAsset +func (a *S3Asset) MediaType(ctx context.Context) mediatype.MediaType { + if a.mediatype == nil { + if a.knownMediaType != nil { + a.mediatype = a.knownMediaType + } else { + if err := a.head(ctx); err == nil { + // Note how we are *not* using the file contents to sniff the media type. + // We want to avoid unecessary requests at all costs. + if a.headData.ContentType != nil { + a.mediatype = mediatype.OfStringAndExtension(*a.headData.ContentType, a.uri.Extension()) + } else { + a.mediatype = mediatype.OfExtension(a.uri.Extension()) + } + } + } + if a.mediatype == nil { // Still nothing found + a.mediatype = &mediatype.Binary + } + } + return *a.mediatype +} + +// CreateFetcher implements PublicationAsset +func (a *S3Asset) CreateFetcher(ctx context.Context, dependencies Dependencies, credentials string) (fetcher.Fetcher, error) { + obj, err := a.object() + if err != nil { + return nil, err + } + + var isDir bool + if a.isDir != nil { + isDir = *a.isDir + } else { + if strings.HasSuffix(*obj.Key, "/") { + // Path ends in a slash, so it's a folder + isDir = true + } else { + // Not sure if it's a folder or a file, need to check + prefix := *obj.Key + "/" + max := int32(1) + out, err := a.client.ListObjectsV2(ctx, &s3.ListObjectsV2Input{ + Bucket: obj.Bucket, + Prefix: &prefix, + MaxKeys: &max, + }) + if err != nil { + return nil, err + } + if len(out.Contents) > 0 { + isDir = true + } + } + a.isDir = &isDir + } + + if isDir || !a.MediaType(ctx).IsZIP() { + base := "" + if !isDir { + // There's some problem checking for the file's existance + if err = a.head(ctx); err != nil { + return nil, err + } + + base = a.Name() + } + return fetcher.NewS3Fetcher(base, a.client, *obj.Bucket, *obj.Key), nil + } else { + factory, ok := dependencies.ArchiveFactory.(archive.SchemeSpecificArchiveFactory) + if !ok { + // It's not possible to determine if the factory actually supports archives on S3 + return nil, errors.New("provided ArchiveFactory does not implement SchemeSpecificArchiveFactory") + } + if !factory.CanOpen(url.SchemeS3) { + return nil, errors.New("provided ArchiveFactory does not support S3 scheme") + } + + return fetcher.NewArchiveFetcherFromURLWithFactoryAndContext(ctx, a.uri, factory) + } + +} diff --git a/pkg/content/content.go b/pkg/content/content.go index ffed15f4..31e0b664 100644 --- a/pkg/content/content.go +++ b/pkg/content/content.go @@ -1,6 +1,7 @@ package content import ( + "context" "strings" "github.com/readium/go-toolkit/pkg/content/element" @@ -8,19 +9,19 @@ import ( ) type Content interface { - Text(separator *string) (string, error) // Extracts the full raw text, or returns null if no text content can be found. - Iterator() iterator.Iterator // Creates a new iterator for this content. - Elements() ([]element.Element, error) // Returns all the elements as a list. + Text(ctx context.Context, separator *string) (string, error) // Extracts the full raw text, or returns null if no text content can be found. + Iterator() iterator.Iterator // Creates a new iterator for this content. + Elements(ctx context.Context) ([]element.Element, error) // Returns all the elements as a list. } // Extracts the full raw text, or returns null if no text content can be found. -func ContentText(content Content, separator *string) (string, error) { +func ContentText(ctx context.Context, content Content, separator *string) (string, error) { sep := "\n" if separator != nil { sep = *separator } var sb strings.Builder - els, err := content.Elements() + els, err := content.Elements(ctx) if err != nil { return "", err } @@ -36,11 +37,11 @@ func ContentText(content Content, separator *string) (string, error) { return strings.TrimSuffix(sb.String(), sep), nil } -func ContentElements(content Content) ([]element.Element, error) { +func ContentElements(ctx context.Context, content Content) ([]element.Element, error) { var elements []element.Element it := content.Iterator() for { - hasNext, err := it.HasNext() + hasNext, err := it.HasNext(ctx) if err != nil { return nil, err } diff --git a/pkg/content/iterator/html.go b/pkg/content/iterator/html.go index 98c13750..e38a1b1e 100644 --- a/pkg/content/iterator/html.go +++ b/pkg/content/iterator/html.go @@ -1,6 +1,7 @@ package iterator import ( + "context" "strings" "github.com/andybalholm/cascadia" @@ -43,12 +44,12 @@ func HTMLFactory() ResourceContentIteratorFactory { } } -func (it *HTMLContentIterator) HasPrevious() (bool, error) { +func (it *HTMLContentIterator) HasPrevious(ctx context.Context) (bool, error) { if it.currentElement != nil && it.currentElement.Delta == -1 { return true, nil } - elements, err := it.elements() + elements, err := it.elements(ctx) if err != nil { return false, err } @@ -79,12 +80,12 @@ func (it *HTMLContentIterator) Previous() element.Element { return el } -func (it *HTMLContentIterator) HasNext() (bool, error) { +func (it *HTMLContentIterator) HasNext(ctx context.Context) (bool, error) { if it.currentElement != nil && it.currentElement.Delta == 1 { return true, nil } - elements, err := it.elements() + elements, err := it.elements(ctx) if err != nil { return false, err } @@ -115,9 +116,9 @@ func (it *HTMLContentIterator) Next() element.Element { return el } -func (it *HTMLContentIterator) elements() (*ParsedElements, error) { +func (it *HTMLContentIterator) elements(ctx context.Context) (*ParsedElements, error) { if it.parsedElements == nil { - elements, err := it.parseElements() + elements, err := it.parseElements(ctx) if err != nil { return nil, err } @@ -126,8 +127,8 @@ func (it *HTMLContentIterator) elements() (*ParsedElements, error) { return it.parsedElements, nil } -func (it *HTMLContentIterator) parseElements() (*ParsedElements, error) { - raw, rerr := it.resource.ReadAsString() +func (it *HTMLContentIterator) parseElements(ctx context.Context) (*ParsedElements, error) { + raw, rerr := fetcher.ReadResourceAsString(ctx, it.resource) if rerr != nil { return nil, errors.Wrap(rerr, "failed reading HTML string of "+it.resource.Link().Href.String()) } diff --git a/pkg/content/iterator/html_converter.go b/pkg/content/iterator/html_converter.go index 73662b0e..a3225582 100644 --- a/pkg/content/iterator/html_converter.go +++ b/pkg/content/iterator/html_converter.go @@ -493,9 +493,7 @@ func (c *HTMLConverter) flushText() { bestRole = element.Heading{Level: 5} case atom.H6: bestRole = element.Heading{Level: 6} - case atom.Blockquote: - fallthrough - case atom.Q: + case atom.Blockquote, atom.Q: quote := element.Quote{} for _, at := range el.Attr { if at.Key == "cite" { diff --git a/pkg/content/iterator/iterator.go b/pkg/content/iterator/iterator.go index 0f1eb6fd..64ec1d4f 100644 --- a/pkg/content/iterator/iterator.go +++ b/pkg/content/iterator/iterator.go @@ -1,20 +1,24 @@ package iterator -import "github.com/readium/go-toolkit/pkg/content/element" +import ( + "context" + + "github.com/readium/go-toolkit/pkg/content/element" +) // Iterates through a list of [Element] items asynchronously. // [hasNext] and [hasPrevious] refer to the last element computed by a previous call to any of both methods. // TODO: It's based on a kotlin iterator, maybe we can make this more of something for go? type Iterator interface { - HasNext() (bool, error) // Returns true if the iterator has a next element - Next() element.Element // Retrieves the element computed by a preceding call to [hasNext]. Panics if [hasNext] was not invoked. - HasPrevious() (bool, error) // Returns true if the iterator has a previous element - Previous() element.Element // Retrieves the element computed by a preceding call to [hasPrevious]. Panics if [hasNext] was not invoked. + HasNext(ctx context.Context) (bool, error) // Returns true if the iterator has a next element + Next() element.Element // Retrieves the element computed by a preceding call to [hasNext]. Panics if [hasNext] was not invoked. + HasPrevious(ctx context.Context) (bool, error) // Returns true if the iterator has a previous element + Previous() element.Element // Retrieves the element computed by a preceding call to [hasPrevious]. Panics if [hasNext] was not invoked. } // Moves to the next item and returns it, or nil if we reached the end. -func ItNextOrNil(it Iterator) (element.Element, error) { - b, err := it.HasNext() +func ItNextOrNil(ctx context.Context, it Iterator) (element.Element, error) { + b, err := it.HasNext(ctx) if err != nil { return nil, err } @@ -25,8 +29,8 @@ func ItNextOrNil(it Iterator) (element.Element, error) { } // Moves to the previous item and returns it, or nil if we reached the beginning. -func ItPreviousOrNil(it Iterator) (element.Element, error) { - b, err := it.HasPrevious() +func ItPreviousOrNil(ctx context.Context, it Iterator) (element.Element, error) { + b, err := it.HasPrevious(ctx) if err != nil { return nil, err } @@ -42,11 +46,11 @@ type IndexedIterator struct { iterator Iterator } -func (it *IndexedIterator) NextContentIn(direction Direction) (element.Element, error) { +func (it *IndexedIterator) NextContentIn(ctx context.Context, direction Direction) (element.Element, error) { if direction == Foward { - return ItNextOrNil(it.iterator) + return ItNextOrNil(ctx, it.iterator) } else { - return ItPreviousOrNil(it.iterator) + return ItPreviousOrNil(ctx, it.iterator) } } diff --git a/pkg/content/iterator/publication.go b/pkg/content/iterator/publication.go index 035bc277..96a9ef37 100644 --- a/pkg/content/iterator/publication.go +++ b/pkg/content/iterator/publication.go @@ -1,6 +1,8 @@ package iterator import ( + "context" + "github.com/readium/go-toolkit/pkg/content/element" "github.com/readium/go-toolkit/pkg/fetcher" "github.com/readium/go-toolkit/pkg/manifest" @@ -28,8 +30,8 @@ func NewPublicationContent(manifest manifest.Manifest, fetcher fetcher.Fetcher, } } -func (it *PublicationContentIterator) HasPrevious() (bool, error) { - e, err := it.nextIn(Backward) +func (it *PublicationContentIterator) HasPrevious(ctx context.Context) (bool, error) { + e, err := it.nextIn(ctx, Backward) if err != nil { return false, err } @@ -44,8 +46,8 @@ func (it *PublicationContentIterator) Previous() element.Element { return it.currentElement.El } -func (it *PublicationContentIterator) HasNext() (bool, error) { - e, err := it.nextIn(Foward) +func (it *PublicationContentIterator) HasNext(ctx context.Context) (bool, error) { + e, err := it.nextIn(ctx, Foward) if err != nil { return false, err } @@ -60,20 +62,20 @@ func (it *PublicationContentIterator) Next() element.Element { return it.currentElement.El } -func (it *PublicationContentIterator) nextIn(direction Direction) (*ElementInDirection, error) { - iterator := it.currentIterator() +func (it *PublicationContentIterator) nextIn(ctx context.Context, direction Direction) (*ElementInDirection, error) { + iterator := it.currentIterator(ctx) if iterator == nil { return nil, nil } - content, err := iterator.NextContentIn(direction) + content, err := iterator.NextContentIn(ctx, direction) if err != nil { return nil, err } if content == nil { - if ni := it.nextIteratorIn(direction, iterator.index); ni != nil { + if ni := it.nextIteratorIn(ctx, direction, iterator.index); ni != nil { it._currentIterator = ni - return it.nextIn(direction) + return it.nextIn(ctx, direction) } return nil, nil } @@ -84,34 +86,34 @@ func (it *PublicationContentIterator) nextIn(direction Direction) (*ElementInDir } // Returns the [Iterator] for the current [Resource] in the reading order. -func (it *PublicationContentIterator) currentIterator() *IndexedIterator { +func (it *PublicationContentIterator) currentIterator(ctx context.Context) *IndexedIterator { if it._currentIterator == nil { - it._currentIterator = it.initialIterator() + it._currentIterator = it.initialIterator(ctx) } return it._currentIterator } // Returns the first iterator starting at [startLocator] or the beginning of the publication. -func (it *PublicationContentIterator) initialIterator() *IndexedIterator { +func (it *PublicationContentIterator) initialIterator(ctx context.Context) *IndexedIterator { var index int var ii *IndexedIterator if it.startLocator != nil { if i := it.manifest.ReadingOrder.IndexOfFirstWithHref(it.startLocator.Href); i > 0 { index = i } - ii = it.loadIteratorAt(index, *it.startLocator) + ii = it.loadIteratorAt(ctx, index, *it.startLocator) } else { - ii = it.loadIteratorAtProgression(index, 0) + ii = it.loadIteratorAtProgression(ctx, index, 0) } if ii == nil { - return it.nextIteratorIn(Foward, index) + return it.nextIteratorIn(ctx, Foward, index) } return ii } // Returns the next resource iterator in the given [direction], starting from [fromIndex] -func (it *PublicationContentIterator) nextIteratorIn(direction Direction, fromIndex int) *IndexedIterator { +func (it *PublicationContentIterator) nextIteratorIn(ctx context.Context, direction Direction, fromIndex int) *IndexedIterator { index := fromIndex + direction.Delta() if index < 0 || index >= len(it.manifest.ReadingOrder) { return nil @@ -122,17 +124,17 @@ func (it *PublicationContentIterator) nextIteratorIn(direction Direction, fromIn progression = 1 } - if it := it.loadIteratorAtProgression(index, progression); it != nil { + if it := it.loadIteratorAtProgression(ctx, index, progression); it != nil { return it } - return it.nextIteratorIn(direction, index) + return it.nextIteratorIn(ctx, direction, index) } // Loads the iterator at the given [index] in the reading order. // The [locator] will be used to compute the starting [Locator] for the iterator. -func (it *PublicationContentIterator) loadIteratorAt(index int, locator manifest.Locator) *IndexedIterator { +func (it *PublicationContentIterator) loadIteratorAt(ctx context.Context, index int, locator manifest.Locator) *IndexedIterator { link := it.manifest.ReadingOrder[index] - resource := it.fetcher.Get(link) + resource := it.fetcher.Get(ctx, link) for _, factory := range it.resourceContentIteratorFactories { res := factory(resource, locator) @@ -145,12 +147,12 @@ func (it *PublicationContentIterator) loadIteratorAt(index int, locator manifest // Loads the iterator at the given [index] in the reading order. // The [progression] will be used to build a locator and call [loadIteratorAt]. -func (it *PublicationContentIterator) loadIteratorAtProgression(index int, progression float64) *IndexedIterator { +func (it *PublicationContentIterator) loadIteratorAtProgression(ctx context.Context, index int, progression float64) *IndexedIterator { link := it.manifest.ReadingOrder[index] locator := it.manifest.LocatorFromLink(link) if locator == nil { return nil } locator.Locations.Progression = &progression - return it.loadIteratorAt(index, *locator) + return it.loadIteratorAt(ctx, index, *locator) } diff --git a/pkg/fetcher/fetcher.go b/pkg/fetcher/fetcher.go index 3aa43671..0316de7c 100644 --- a/pkg/fetcher/fetcher.go +++ b/pkg/fetcher/fetcher.go @@ -1,6 +1,10 @@ package fetcher -import "github.com/readium/go-toolkit/pkg/manifest" +import ( + "context" + + "github.com/readium/go-toolkit/pkg/manifest" +) // Fetcher provides access to a Resource from a Link. type Fetcher interface { @@ -13,7 +17,7 @@ type Fetcher interface { * If the medium has an inherent resource order, it should be followed. * Otherwise, HREFs are sorted alphabetically. */ - Links() (manifest.LinkList, error) + Links(ctx context.Context) (manifest.LinkList, error) /** * Returns the [Resource] at the given [link]'s HREF. @@ -21,7 +25,7 @@ type Fetcher interface { * A [Resource] is always returned, since for some cases we can't know if it exists before * actually fetching it, such as HTTP. Therefore, errors are handled at the Resource level. */ - Get(link manifest.Link) Resource + Get(ctx context.Context, link manifest.Link) Resource // Closes this object and releases any resources associated with it. // If the object is already closed then invoking this method has no effect. @@ -31,11 +35,11 @@ type Fetcher interface { // A [Fetcher] providing no resources at all. type EmptyFetcher struct{} -func (f EmptyFetcher) Links() (manifest.LinkList, error) { +func (f EmptyFetcher) Links(ctx context.Context) (manifest.LinkList, error) { return manifest.LinkList{}, nil } -func (f EmptyFetcher) Get(link manifest.Link) Resource { +func (f EmptyFetcher) Get(ctx context.Context, link manifest.Link) Resource { return NewFailureResource(link, NotFound(nil)) } diff --git a/pkg/fetcher/fetcher_archive.go b/pkg/fetcher/fetcher_archive.go index cee368a6..31827a01 100644 --- a/pkg/fetcher/fetcher_archive.go +++ b/pkg/fetcher/fetcher_archive.go @@ -1,6 +1,7 @@ package fetcher import ( + "context" "errors" "io" "path" @@ -8,7 +9,7 @@ import ( "github.com/readium/go-toolkit/pkg/archive" "github.com/readium/go-toolkit/pkg/manifest" "github.com/readium/go-toolkit/pkg/mediatype" - "github.com/readium/xmlquery" + "github.com/readium/go-toolkit/pkg/util/url" ) // Provides access to entries of an archive. @@ -17,7 +18,7 @@ type ArchiveFetcher struct { } // Links implements Fetcher -func (f *ArchiveFetcher) Links() (manifest.LinkList, error) { +func (f *ArchiveFetcher) Links(ctx context.Context) (manifest.LinkList, error) { entries := f.archive.Entries() links := make(manifest.LinkList, 0, len(entries)) for _, af := range entries { @@ -42,7 +43,7 @@ func (f *ArchiveFetcher) Links() (manifest.LinkList, error) { } // Get implements Fetcher -func (f *ArchiveFetcher) Get(link manifest.Link) Resource { +func (f *ArchiveFetcher) Get(ctx context.Context, link manifest.Link) Resource { entry, err := f.archive.Entry(link.Href.String()) if err != nil { return NewFailureResource(link, NotFound(err)) @@ -79,12 +80,43 @@ func NewArchiveFetcher(a archive.Archive) *ArchiveFetcher { } } -func NewArchiveFetcherFromPath(filepath string) (*ArchiveFetcher, error) { - return NewArchiveFetcherFromPathWithFactory(filepath, archive.NewArchiveFactory()) +func NewArchiveFetcherFromPath(ctx context.Context, path string) (*ArchiveFetcher, error) { + return NewArchiveFetcherFromPathWithFactory(ctx, path, archive.NewArchiveFactory()) } -func NewArchiveFetcherFromPathWithFactory(path string, factory archive.ArchiveFactory) (*ArchiveFetcher, error) { - a, err := factory.Open(path, "") // TODO password +func NewArchiveFetcherFromPathWithFactory(ctx context.Context, path string, factory archive.ArchiveFactory) (*ArchiveFetcher, error) { + pth, err := url.FromFilepath(path) + if err != nil { + return nil, err + } + + a, err := factory.Open(ctx, pth, "") + if err != nil { + return nil, err + } + return &ArchiveFetcher{ + archive: a, + }, nil +} + +func NewArchiveFetcherFromURLWithFactory(ctx context.Context, url url.URL, factory archive.ArchiveFactory) (*ArchiveFetcher, error) { + a, err := factory.Open(ctx, url, "") + if err != nil { + return nil, err + } + return &ArchiveFetcher{ + archive: a, + }, nil +} + +func NewArchiveFetcherFromURLWithFactoryAndContext(ctx context.Context, url url.URL, factory archive.SchemeSpecificArchiveFactory) (*ArchiveFetcher, error) { + var a archive.Archive + var err error + if f, ok := factory.(archive.ArchiveFactory); ok { + a, err = f.Open(ctx, url, "") + } else { + return nil, errors.New("factory does not implement ArchiveFactory") + } if err != nil { return nil, err } @@ -121,7 +153,7 @@ func (r *entryResource) Properties() manifest.Properties { } // Read implements Resource -func (r *entryResource) Read(start int64, end int64) ([]byte, *ResourceError) { +func (r *entryResource) Read(ctx context.Context, start int64, end int64) ([]byte, *ResourceError) { data, err := r.entry.Read(start, end) if err == nil { return data, nil @@ -137,7 +169,7 @@ func (r *entryResource) Read(start int64, end int64) ([]byte, *ResourceError) { } // Stream implements Resource -func (r *entryResource) Stream(w io.Writer, start int64, end int64) (int64, *ResourceError) { +func (r *entryResource) Stream(ctx context.Context, w io.Writer, start int64, end int64) (int64, *ResourceError) { n, err := r.entry.Stream(w, start, end) if err == nil { return n, nil @@ -158,12 +190,12 @@ func (r *entryResource) CompressedAs(compressionMethod archive.CompressionMethod } // CompressedLength implements CompressedResource -func (r *entryResource) CompressedLength() int64 { +func (r *entryResource) CompressedLength(ctx context.Context) int64 { return int64(r.entry.CompressedLength()) } // StreamCompressed implements CompressedResource -func (r *entryResource) StreamCompressed(w io.Writer) (int64, *ResourceError) { +func (r *entryResource) StreamCompressed(ctx context.Context, w io.Writer) (int64, *ResourceError) { i, err := r.entry.StreamCompressed(w) if err == nil { return i, nil @@ -172,7 +204,7 @@ func (r *entryResource) StreamCompressed(w io.Writer) (int64, *ResourceError) { } // StreamCompressedGzip implements CompressedResource -func (r *entryResource) StreamCompressedGzip(w io.Writer) (int64, *ResourceError) { +func (r *entryResource) StreamCompressedGzip(ctx context.Context, w io.Writer) (int64, *ResourceError) { i, err := r.entry.StreamCompressedGzip(w) if err == nil { return i, nil @@ -181,7 +213,7 @@ func (r *entryResource) StreamCompressedGzip(w io.Writer) (int64, *ResourceError } // ReadCompressed implements CompressedResource -func (r *entryResource) ReadCompressed() ([]byte, *ResourceError) { +func (r *entryResource) ReadCompressed(ctx context.Context) ([]byte, *ResourceError) { i, err := r.entry.ReadCompressed() if err == nil { return i, nil @@ -190,7 +222,7 @@ func (r *entryResource) ReadCompressed() ([]byte, *ResourceError) { } // ReadCompressedGzip implements CompressedResource -func (r *entryResource) ReadCompressedGzip() ([]byte, *ResourceError) { +func (r *entryResource) ReadCompressedGzip(ctx context.Context) ([]byte, *ResourceError) { i, err := r.entry.ReadCompressedGzip() if err == nil { return i, nil @@ -199,21 +231,6 @@ func (r *entryResource) ReadCompressedGzip() ([]byte, *ResourceError) { } // Length implements Resource -func (r *entryResource) Length() (int64, *ResourceError) { +func (r *entryResource) Length(ctx context.Context) (int64, *ResourceError) { return int64(r.entry.Length()), nil } - -// ReadAsString implements Resource -func (r *entryResource) ReadAsString() (string, *ResourceError) { // TODO determine how charset is needed - return ReadResourceAsString(r) -} - -// ReadAsJSON implements Resource -func (r *entryResource) ReadAsJSON() (map[string]interface{}, *ResourceError) { - return ReadResourceAsJSON(r) -} - -// ReadAsXML implements Resource -func (r *entryResource) ReadAsXML(prefixes map[string]string) (*xmlquery.Node, *ResourceError) { - return ReadResourceAsXML(r, prefixes) -} diff --git a/pkg/fetcher/fetcher_archive_test.go b/pkg/fetcher/fetcher_archive_test.go index 5e00aa51..92009a71 100644 --- a/pkg/fetcher/fetcher_archive_test.go +++ b/pkg/fetcher/fetcher_archive_test.go @@ -10,7 +10,7 @@ import ( ) func withArchiveFetcher(t *testing.T, callback func(a *ArchiveFetcher)) { - a, err := NewArchiveFetcherFromPath("./testdata/epub.epub") + a, err := NewArchiveFetcherFromPath(t.Context(), "./testdata/epub.epub") assert.NoError(t, err) callback(a) } @@ -54,12 +54,12 @@ func TestArchiveFetcherLinks(t *testing.T) { } withArchiveFetcher(t, func(a *ArchiveFetcher) { - links, err := a.Links() + links, err := a.Links(t.Context()) assert.Nil(t, err) mustLinks := make([]manifest.Link, len(mustContain)) for i, l := range mustContain { - assert.Equal(t, l.Properties, a.Get(l.Link).Properties()) + assert.Equal(t, l.Properties, a.Get(t.Context(), l.Link).Properties()) mustLinks[i] = l.Link } assert.ElementsMatch(t, mustLinks, links) @@ -68,31 +68,31 @@ func TestArchiveFetcherLinks(t *testing.T) { func TestArchiveFetcherLengthNotFound(t *testing.T) { withArchiveFetcher(t, func(a *ArchiveFetcher) { - resource := a.Get(manifest.Link{Href: manifest.MustNewHREFFromString("unknown", false)}) - _, err := resource.Length() + resource := a.Get(t.Context(), manifest.Link{Href: manifest.MustNewHREFFromString("unknown", false)}) + _, err := resource.Length(t.Context()) assert.Equal(t, NotFound(err.Cause), err) }) } func TestArchiveFetcherReadNotFound(t *testing.T) { withArchiveFetcher(t, func(a *ArchiveFetcher) { - resource := a.Get(manifest.Link{Href: manifest.MustNewHREFFromString("unknown", false)}) - _, err := resource.Read(0, 0) + resource := a.Get(t.Context(), manifest.Link{Href: manifest.MustNewHREFFromString("unknown", false)}) + _, err := resource.Read(t.Context(), 0, 0) assert.Equal(t, NotFound(err.Cause), err) - _, err = resource.Stream(&bytes.Buffer{}, 0, 0) + _, err = resource.Stream(t.Context(), &bytes.Buffer{}, 0, 0) assert.Equal(t, NotFound(err.Cause), err) }) } func TestArchiveFetcherRead(t *testing.T) { withArchiveFetcher(t, func(a *ArchiveFetcher) { - resource := a.Get(manifest.Link{Href: manifest.MustNewHREFFromString("mimetype", false)}) - bin, err := resource.Read(0, 0) + resource := a.Get(t.Context(), manifest.Link{Href: manifest.MustNewHREFFromString("mimetype", false)}) + bin, err := resource.Read(t.Context(), 0, 0) if assert.Nil(t, err) { assert.Equal(t, "application/epub+zip", string(bin)) } var b bytes.Buffer - n, err := resource.Stream(&b, 0, 0) + n, err := resource.Stream(t.Context(), &b, 0, 0) if assert.Nil(t, err) { assert.EqualValues(t, 20, n) assert.Equal(t, "application/epub+zip", b.String()) @@ -102,13 +102,13 @@ func TestArchiveFetcherRead(t *testing.T) { func TestArchiveFetcherReadRange(t *testing.T) { withArchiveFetcher(t, func(a *ArchiveFetcher) { - resource := a.Get(manifest.Link{Href: manifest.MustNewHREFFromString("mimetype", false)}) - bin, err := resource.Read(0, 10) + resource := a.Get(t.Context(), manifest.Link{Href: manifest.MustNewHREFFromString("mimetype", false)}) + bin, err := resource.Read(t.Context(), 0, 10) if assert.Nil(t, err) { assert.Equal(t, "application", string(bin)) } var b bytes.Buffer - n, err := resource.Stream(&b, 0, 10) + n, err := resource.Stream(t.Context(), &b, 0, 10) if assert.Nil(t, err) { assert.EqualValues(t, 11, n) assert.Equal(t, "application", b.String()) @@ -118,8 +118,8 @@ func TestArchiveFetcherReadRange(t *testing.T) { func TestArchiveFetcherComputingLength(t *testing.T) { withArchiveFetcher(t, func(a *ArchiveFetcher) { - resource := a.Get(manifest.Link{Href: manifest.MustNewHREFFromString("mimetype", false)}) - length, err := resource.Length() + resource := a.Get(t.Context(), manifest.Link{Href: manifest.MustNewHREFFromString("mimetype", false)}) + length, err := resource.Length(t.Context()) assert.Nil(t, err) assert.EqualValues(t, 20, length) }) @@ -127,23 +127,23 @@ func TestArchiveFetcherComputingLength(t *testing.T) { func TestArchiveFetcherDirectoryLengthNotFound(t *testing.T) { withArchiveFetcher(t, func(a *ArchiveFetcher) { - resource := a.Get(manifest.Link{Href: manifest.MustNewHREFFromString("EPUB", false)}) - _, err := resource.Length() + resource := a.Get(t.Context(), manifest.Link{Href: manifest.MustNewHREFFromString("EPUB", false)}) + _, err := resource.Length(t.Context()) assert.Equal(t, NotFound(err.Cause), err) }) } func TestArchiveFetcherFileNotFoundLength(t *testing.T) { withArchiveFetcher(t, func(a *ArchiveFetcher) { - resource := a.Get(manifest.Link{Href: manifest.MustNewHREFFromString("unknown", false)}) - _, err := resource.Length() + resource := a.Get(t.Context(), manifest.Link{Href: manifest.MustNewHREFFromString("unknown", false)}) + _, err := resource.Length(t.Context()) assert.Equal(t, NotFound(err.Cause), err) }) } func TestArchiveFetcherAddsProperties(t *testing.T) { withArchiveFetcher(t, func(a *ArchiveFetcher) { - resource := a.Get(manifest.Link{Href: manifest.MustNewHREFFromString("EPUB/css/epub.css", false)}) + resource := a.Get(t.Context(), manifest.Link{Href: manifest.MustNewHREFFromString("EPUB/css/epub.css", false)}) assert.Equal(t, manifest.Properties{ "https://readium.org/webpub-manifest/properties#archive": map[string]interface{}{ "entryLength": uint64(595), diff --git a/pkg/fetcher/fetcher_file.go b/pkg/fetcher/fetcher_file.go index 54c93dbd..bae53a58 100644 --- a/pkg/fetcher/fetcher_file.go +++ b/pkg/fetcher/fetcher_file.go @@ -1,26 +1,28 @@ package fetcher import ( + "context" "errors" "io" "io/fs" "os" "path/filepath" + "runtime" "strings" + "weak" "github.com/readium/go-toolkit/pkg/manifest" "github.com/readium/go-toolkit/pkg/mediatype" - "github.com/readium/xmlquery" ) // Provides access to resources on the local file system. type FileFetcher struct { paths map[string]string - resources []Resource // This is weak on mobile + resources []weak.Pointer[FileResource] } // Links implements Fetcher -func (f *FileFetcher) Links() (manifest.LinkList, error) { +func (f *FileFetcher) Links(ctx context.Context) (manifest.LinkList, error) { links := make(manifest.LinkList, 0) for href, xpath := range f.paths { axpath, err := filepath.Abs(xpath) @@ -29,7 +31,7 @@ func (f *FileFetcher) Links() (manifest.LinkList, error) { } err = filepath.WalkDir(xpath, func(apath string, d fs.DirEntry, err error) error { - if d == nil { // xpath is afile + if d == nil { // xpath is a file fi, err := os.Stat(xpath) if err != nil { return err @@ -52,7 +54,7 @@ func (f *FileFetcher) Links() (manifest.LinkList, error) { f, err := os.Open(apath) if err == nil { defer f.Close() - mt := mediatype.OfFileOnly(f) + mt := mediatype.OfFileOnly(ctx, f) if mt != nil { link.MediaType = mt } @@ -76,7 +78,7 @@ func (f *FileFetcher) Links() (manifest.LinkList, error) { } // Get implements Fetcher -func (f *FileFetcher) Get(link manifest.Link) Resource { +func (f *FileFetcher) Get(ctx context.Context, link manifest.Link) Resource { linkHref := link.Href.String() for itemHref, itemFile := range f.paths { if strings.HasPrefix(linkHref, itemHref) { @@ -92,7 +94,7 @@ func (f *FileFetcher) Get(link manifest.Link) Resource { } if strings.HasPrefix(rapath, iapath) { resource := NewFileResource(link, resourceFile) - f.resources = append(f.resources, resource) + f.resources = append(f.resources, weak.Make(resource)) return resource } } @@ -102,8 +104,11 @@ func (f *FileFetcher) Get(link manifest.Link) Resource { // Close implements Fetcher func (f *FileFetcher) Close() { + // Safety mechanism to cleanup any os.File handles still open for _, res := range f.resources { - res.Close() + if r := res.Value(); r != nil { + r.Close() + } } f.resources = nil } @@ -126,6 +131,7 @@ func (r *FileResource) Link() manifest.Link { return r.link } +// Properties implements Resource func (r *FileResource) Properties() manifest.Properties { return manifest.Properties{} } @@ -159,14 +165,16 @@ func (r *FileResource) open() (*os.File, *ResourceError) { return nil, NotFound(errors.New("is a directory")) } r.file = f + runtime.AddCleanup(r, func(f *os.File) { + f.Close() + }, f) return f, nil } // Read implements Resource -func (r *FileResource) Read(start int64, end int64) ([]byte, *ResourceError) { +func (r *FileResource) Read(ctx context.Context, start int64, end int64) ([]byte, *ResourceError) { if end < start { - err := RangeNotSatisfiable(errors.New("end of range smaller than start")) - return nil, err + return nil, RangeNotSatisfiable(errors.New("end of range smaller than start")) } f, ex := r.open() if ex != nil { @@ -197,7 +205,7 @@ func (r *FileResource) Read(start int64, end int64) ([]byte, *ResourceError) { } // Stream implements Resource -func (r *FileResource) Stream(w io.Writer, start int64, end int64) (int64, *ResourceError) { +func (r *FileResource) Stream(ctx context.Context, w io.Writer, start int64, end int64) (int64, *ResourceError) { if end < start { err := RangeNotSatisfiable(errors.New("end of range smaller than start")) return -1, err @@ -228,7 +236,7 @@ func (r *FileResource) Stream(w io.Writer, start int64, end int64) (int64, *Reso } // Length implements Resource -func (r *FileResource) Length() (int64, *ResourceError) { +func (r *FileResource) Length(ctx context.Context) (int64, *ResourceError) { f, ex := r.open() if ex != nil { return 0, ex @@ -240,21 +248,6 @@ func (r *FileResource) Length() (int64, *ResourceError) { return fi.Size(), nil } -// ReadAsString implements Resource -func (r *FileResource) ReadAsString() (string, *ResourceError) { - return ReadResourceAsString(r) -} - -// ReadAsJSON implements Resource -func (r *FileResource) ReadAsJSON() (map[string]interface{}, *ResourceError) { - return ReadResourceAsJSON(r) -} - -// ReadAsXML implements Resource -func (r *FileResource) ReadAsXML(prefixes map[string]string) (*xmlquery.Node, *ResourceError) { - return ReadResourceAsXML(r, prefixes) -} - func NewFileResource(link manifest.Link, abspath string) *FileResource { return &FileResource{ link: link, diff --git a/pkg/fetcher/fetcher_file_test.go b/pkg/fetcher/fetcher_file_test.go index dfa02f64..03c32a65 100644 --- a/pkg/fetcher/fetcher_file_test.go +++ b/pkg/fetcher/fetcher_file_test.go @@ -17,27 +17,27 @@ var testFileFetcher = &FileFetcher{ } func TestFileFetcherLengthNotFound(t *testing.T) { - resource := testFileFetcher.Get(manifest.Link{Href: manifest.MustNewHREFFromString("unknown", false)}) - _, err := resource.Length() + resource := testFileFetcher.Get(t.Context(), manifest.Link{Href: manifest.MustNewHREFFromString("unknown", false)}) + _, err := resource.Length(t.Context()) assert.Equal(t, NotFound(err.Cause), err) } func TestFileFetcherReadNotFound(t *testing.T) { - resource := testFileFetcher.Get(manifest.Link{Href: manifest.MustNewHREFFromString("unknown", false)}) - _, err := resource.Read(0, 0) + resource := testFileFetcher.Get(t.Context(), manifest.Link{Href: manifest.MustNewHREFFromString("unknown", false)}) + _, err := resource.Read(t.Context(), 0, 0) assert.Equal(t, NotFound(err.Cause), err) - _, err = resource.Stream(&bytes.Buffer{}, 0, 0) + _, err = resource.Stream(t.Context(), &bytes.Buffer{}, 0, 0) assert.Equal(t, NotFound(err.Cause), err) } func TestFileFetcherHrefInMap(t *testing.T) { - resource := testFileFetcher.Get(manifest.Link{Href: manifest.MustNewHREFFromString("file_href", false)}) - bin, err := resource.Read(0, 0) + resource := testFileFetcher.Get(t.Context(), manifest.Link{Href: manifest.MustNewHREFFromString("file_href", false)}) + bin, err := resource.Read(t.Context(), 0, 0) if assert.Nil(t, err) { assert.Equal(t, "text", string(bin)) } var b bytes.Buffer - n, err := resource.Stream(&b, 0, 0) + n, err := resource.Stream(t.Context(), &b, 0, 0) if assert.Nil(t, err) { assert.EqualValues(t, 4, n) assert.Equal(t, "text", b.String()) @@ -45,13 +45,13 @@ func TestFileFetcherHrefInMap(t *testing.T) { } func TestFileFetcherDirectoryFile(t *testing.T) { - resource := testFileFetcher.Get(manifest.Link{Href: manifest.MustNewHREFFromString("dir_href/text1.txt", false)}) - bin, err := resource.Read(0, 0) + resource := testFileFetcher.Get(t.Context(), manifest.Link{Href: manifest.MustNewHREFFromString("dir_href/text1.txt", false)}) + bin, err := resource.Read(t.Context(), 0, 0) if assert.Nil(t, err) { assert.Equal(t, "text1", string(bin)) } var b bytes.Buffer - n, err := resource.Stream(&b, 0, 0) + n, err := resource.Stream(t.Context(), &b, 0, 0) if assert.Nil(t, err) { assert.EqualValues(t, 5, n) assert.Equal(t, "text1", b.String()) @@ -59,12 +59,12 @@ func TestFileFetcherDirectoryFile(t *testing.T) { } func TestFileFetcherSubdirectoryFile(t *testing.T) { - resource := testFileFetcher.Get(manifest.Link{Href: manifest.MustNewHREFFromString("dir_href/subdirectory/text2.txt", false)}) - bin, err := resource.Read(0, 0) + resource := testFileFetcher.Get(t.Context(), manifest.Link{Href: manifest.MustNewHREFFromString("dir_href/subdirectory/text2.txt", false)}) + bin, err := resource.Read(t.Context(), 0, 0) assert.Nil(t, err) assert.Equal(t, "text2", string(bin)) var b bytes.Buffer - n, err := resource.Stream(&b, 0, 0) + n, err := resource.Stream(t.Context(), &b, 0, 0) if assert.Nil(t, err) { assert.EqualValues(t, 5, n) assert.Equal(t, "text2", b.String()) @@ -72,30 +72,30 @@ func TestFileFetcherSubdirectoryFile(t *testing.T) { } func TestFileFetcherDirectoryNotFound(t *testing.T) { - resource := testFileFetcher.Get(manifest.Link{Href: manifest.MustNewHREFFromString("dir_href/subdirectory", false)}) - _, err := resource.Read(0, 0) + resource := testFileFetcher.Get(t.Context(), manifest.Link{Href: manifest.MustNewHREFFromString("dir_href/subdirectory", false)}) + _, err := resource.Read(t.Context(), 0, 0) assert.Equal(t, NotFound(err.Cause), err) - _, err = resource.Stream(&bytes.Buffer{}, 0, 0) + _, err = resource.Stream(t.Context(), &bytes.Buffer{}, 0, 0) assert.Equal(t, NotFound(err.Cause), err) } func TestFileFetcherDirectoryTraversalNotFound(t *testing.T) { - resource := testFileFetcher.Get(manifest.Link{Href: manifest.MustNewHREFFromString("dir_href/../text.txt", false)}) - _, err := resource.Read(0, 0) + resource := testFileFetcher.Get(t.Context(), manifest.Link{Href: manifest.MustNewHREFFromString("dir_href/../text.txt", false)}) + _, err := resource.Read(t.Context(), 0, 0) assert.Equal(t, NotFound(err.Cause), err, "cannot traverse up a directory using '..'") - _, err = resource.Stream(&bytes.Buffer{}, 0, 0) + _, err = resource.Stream(t.Context(), &bytes.Buffer{}, 0, 0) assert.Equal(t, NotFound(err.Cause), err, "cannot traverse up a directory using '..'") } func TestFileFetcherReadRange(t *testing.T) { - resource := testFileFetcher.Get(manifest.Link{Href: manifest.MustNewHREFFromString("file_href", false)}) - bin, err := resource.Read(0, 2) + resource := testFileFetcher.Get(t.Context(), manifest.Link{Href: manifest.MustNewHREFFromString("file_href", false)}) + bin, err := resource.Read(t.Context(), 0, 2) if assert.Nil(t, err) { assert.Equal(t, "tex", string(bin), "read data should be the first three bytes of the file") } var b bytes.Buffer - n, err := resource.Stream(&b, 0, 2) + n, err := resource.Stream(t.Context(), &b, 0, 2) if assert.Nil(t, err) { assert.EqualValues(t, 3, n) assert.Equal(t, "tex", b.String(), "read data should be the first three bytes of the file") @@ -103,24 +103,24 @@ func TestFileFetcherReadRange(t *testing.T) { } func TestFileFetcherTwoRangesSameResource(t *testing.T) { - resource := testFileFetcher.Get(manifest.Link{Href: manifest.MustNewHREFFromString("file_href", false)}) - bin, err := resource.Read(0, 1) + resource := testFileFetcher.Get(t.Context(), manifest.Link{Href: manifest.MustNewHREFFromString("file_href", false)}) + bin, err := resource.Read(t.Context(), 0, 1) if assert.Nil(t, err) { assert.Equal(t, "te", string(bin)) } var b bytes.Buffer - n, err := resource.Stream(&b, 0, 1) + n, err := resource.Stream(t.Context(), &b, 0, 1) if assert.Nil(t, err) { assert.EqualValues(t, 2, n) assert.Equal(t, "te", b.String()) } - bin, err = resource.Read(1, 3) + bin, err = resource.Read(t.Context(), 1, 3) if assert.Nil(t, err) { assert.Equal(t, "ext", string(bin)) } b.Reset() - n, err = resource.Stream(&b, 1, 3) + n, err = resource.Stream(t.Context(), &b, 1, 3) if assert.Nil(t, err) { assert.EqualValues(t, 3, n) assert.Equal(t, "ext", b.String()) @@ -128,13 +128,13 @@ func TestFileFetcherTwoRangesSameResource(t *testing.T) { } func TestFileFetcherOutOfRangeClamping(t *testing.T) { - resource := testFileFetcher.Get(manifest.Link{Href: manifest.MustNewHREFFromString("file_href", false)}) - bin, err := resource.Read(-5, 60) + resource := testFileFetcher.Get(t.Context(), manifest.Link{Href: manifest.MustNewHREFFromString("file_href", false)}) + bin, err := resource.Read(t.Context(), -5, 60) if assert.Nil(t, err) { assert.Equal(t, "text", string(bin)) } var b bytes.Buffer - n, err := resource.Stream(&b, -5, 60) + n, err := resource.Stream(t.Context(), &b, -5, 60) if assert.Nil(t, err) { assert.EqualValues(t, 4, n) assert.Equal(t, "text", b.String()) @@ -142,38 +142,38 @@ func TestFileFetcherOutOfRangeClamping(t *testing.T) { } func TestFileFetcherDecreasingRange(t *testing.T) { - resource := testFileFetcher.Get(manifest.Link{Href: manifest.MustNewHREFFromString("file_href", false)}) - _, err := resource.Read(60, 20) + resource := testFileFetcher.Get(t.Context(), manifest.Link{Href: manifest.MustNewHREFFromString("file_href", false)}) + _, err := resource.Read(t.Context(), 60, 20) if assert.Error(t, err) { assert.Equal(t, RangeNotSatisfiable(err.Cause), err, "range isn't satisfiable") } - _, err = resource.Stream(&bytes.Buffer{}, 60, 20) + _, err = resource.Stream(t.Context(), &bytes.Buffer{}, 60, 20) if assert.Error(t, err) { assert.Equal(t, RangeNotSatisfiable(err.Cause), err, "range isn't satisfiable") } } func TestFileFetcherComputingLength(t *testing.T) { - resource := testFileFetcher.Get(manifest.Link{Href: manifest.MustNewHREFFromString("file_href", false)}) - length, err := resource.Length() + resource := testFileFetcher.Get(t.Context(), manifest.Link{Href: manifest.MustNewHREFFromString("file_href", false)}) + length, err := resource.Length(t.Context()) assert.Nil(t, err) assert.EqualValues(t, 4, length) } func TestFileFetcherDirectoryLengthNotFound(t *testing.T) { - resource := testFileFetcher.Get(manifest.Link{Href: manifest.MustNewHREFFromString("dir_href/subdirectory", false)}) - _, err := resource.Length() + resource := testFileFetcher.Get(t.Context(), manifest.Link{Href: manifest.MustNewHREFFromString("dir_href/subdirectory", false)}) + _, err := resource.Length(t.Context()) assert.Equal(t, NotFound(err.Cause), err) } func TestFileFetcherFileNotFoundLength(t *testing.T) { - resource := testFileFetcher.Get(manifest.Link{Href: manifest.MustNewHREFFromString("unknown", false)}) - _, err := resource.Length() + resource := testFileFetcher.Get(t.Context(), manifest.Link{Href: manifest.MustNewHREFFromString("unknown", false)}) + _, err := resource.Length(t.Context()) assert.Equal(t, NotFound(err.Cause), err) } func TestFileFetcherLinks(t *testing.T) { - links, err := testFileFetcher.Links() + links, err := testFileFetcher.Links(t.Context()) assert.Nil(t, err) mustContain := manifest.LinkList{{ diff --git a/pkg/fetcher/fetcher_gcs.go b/pkg/fetcher/fetcher_gcs.go new file mode 100644 index 00000000..32df770d --- /dev/null +++ b/pkg/fetcher/fetcher_gcs.go @@ -0,0 +1,257 @@ +package fetcher + +import ( + "context" + "errors" + "io" + "net/http" + "path" + "strings" + + "cloud.google.com/go/storage" + "github.com/readium/go-toolkit/pkg/manifest" + "github.com/readium/go-toolkit/pkg/mediatype" + "github.com/readium/go-toolkit/pkg/util/url" + "google.golang.org/api/googleapi" + "google.golang.org/api/iterator" +) + +type GCSFetcher struct { + href string + client *storage.Client + handle *storage.ObjectHandle + + cachedLinks manifest.LinkList +} + +func NewGCSFetcher(href string, client *storage.Client, handle *storage.ObjectHandle) *GCSFetcher { + if client == nil || handle == nil { + panic("GCSFetcher requires a non-nil client and handle") + } + return &GCSFetcher{ + client: client, + href: href, + handle: handle, + } +} + +// Links implements Fetcher +func (f *GCSFetcher) Links(ctx context.Context) (manifest.LinkList, error) { + if len(f.cachedLinks) > 0 { + return f.cachedLinks, nil + } + + prefix := f.handle.ObjectName() + if !strings.HasSuffix(prefix, "/") { + prefix += "/" + } + + // List all items in the "folder" + it := f.client.Bucket(f.handle.BucketName()).Objects(ctx, &storage.Query{ + Prefix: prefix, + Delimiter: "/", + }) + it.PageInfo().MaxSize = 1000 // Should be enough. We can see about increasing this based on implementer feedback. + itemAttrs, err := it.Next() + if err == nil { + f.cachedLinks = make(manifest.LinkList, 0, it.PageInfo().Remaining()+1) + processItem := func(item *storage.ObjectAttrs) error { + if item.Size == 0 { + return nil + } + + href, err := manifest.NewHREFFromString(path.Join(f.href, strings.TrimPrefix(item.Name, prefix)), false) + if err != nil { + return err + } + link := manifest.Link{ + Href: href, + } + + ext := path.Ext(item.Name) + if ext != "" { + mt := mediatype.OfExtension(ext[1:]) + if mt != nil { + link.MediaType = mt + } + } + f.cachedLinks = append(f.cachedLinks, link) + return nil + } + if err := processItem(itemAttrs); err != nil { + return nil, err + } + for { + itemAttrs, err = it.Next() + if err == iterator.Done { + break + } else if err != nil { + return nil, err + } + if err := processItem(itemAttrs); err != nil { + return nil, err + } + } + } else if err == iterator.Done { + // Empty directory + if strings.HasSuffix(f.handle.ObjectName(), "/") { + return f.cachedLinks, nil + } + + ext := path.Ext(f.handle.ObjectName()) + if ext != "" { + ext = ext[1:] + } + mt := mediatype.OfExtension(ext) + if mt == nil { + mt = &mediatype.Binary + } + + // Not a directory, just a single file + f.cachedLinks = manifest.LinkList{{ + Href: manifest.NewHREF(url.MustURLFromString(f.href)), + MediaType: mt, + }} + } else { + // Something else than EOF + return nil, err + } + + return f.cachedLinks, nil +} + +// Get implements Fetcher +func (f *GCSFetcher) Get(ctx context.Context, link manifest.Link) Resource { + linkHref := link.Href.String() + if strings.HasPrefix(linkHref, f.href) { + resourceFile := path.Join(f.handle.ObjectName(), strings.TrimPrefix(linkHref, f.href)) + return &gcsResource{ + handle: f.client.Bucket(f.handle.BucketName()).Object(resourceFile), + link: link, + } + } + + return NewFailureResource(link, NotFound(errors.New("couldn't find "+linkHref+" in GCSFetcher paths"))) +} + +func (f *GCSFetcher) Close() { + // No-op for GCS +} + +// Resource from GCS +type gcsResource struct { + link manifest.Link + handle *storage.ObjectHandle + cachedAttrs *storage.ObjectAttrs +} + +// Link implements Resource +func (r *gcsResource) Link() manifest.Link { + return r.link +} + +// Properties implements Resource +func (r *gcsResource) Properties() manifest.Properties { + return manifest.Properties{} +} + +// Close implements Resource +func (r *gcsResource) Close() { + // No-op for GCS +} + +// File implements Resource +func (r *gcsResource) File() string { + return "" +} + +func (r *gcsResource) attrs(ctx context.Context) (*storage.ObjectAttrs, *ResourceError) { + if r.cachedAttrs == nil { + head, err := r.handle.Attrs(ctx) + if err != nil { + return nil, gcsErrorToException(err) + } + r.cachedAttrs = head + } + return r.cachedAttrs, nil +} + +// Read implements Resource +func (r *gcsResource) Read(ctx context.Context, start int64, end int64) ([]byte, *ResourceError) { + if end < start { + return nil, RangeNotSatisfiable(errors.New("end of range smaller than start")) + } + + var rdr *storage.Reader + var err error + if start == 0 && end == 0 { + rdr, err = r.handle.NewReader(ctx) + } else { + rdr, err = r.handle.NewRangeReader(ctx, start, end-start+1) + } + if err != nil { + return nil, gcsErrorToException(err) + } + defer rdr.Close() + + var data []byte + if rdr.Remain() >= 0 { + data = make([]byte, rdr.Remain()) + _, err = io.ReadFull(rdr, data) + } else { + data, err = io.ReadAll(rdr) + } + if err != nil { + return nil, Other(err) + } + return data, nil +} + +// Stream implements Resource +func (r *gcsResource) Stream(ctx context.Context, w io.Writer, start int64, end int64) (int64, *ResourceError) { + if end < start { + return -1, RangeNotSatisfiable(errors.New("end of range smaller than start")) + } + + var rdr *storage.Reader + var err error + if start == 0 && end == 0 { + rdr, err = r.handle.NewReader(ctx) + } else { + rdr, err = r.handle.NewRangeReader(ctx, start, end-start+1) + } + if err != nil { + return -1, gcsErrorToException(err) + } + defer rdr.Close() + + n, err := io.Copy(w, rdr) + if err != nil { + return -1, Other(err) + } + return n, nil +} + +// Length implements Resource +func (r *gcsResource) Length(ctx context.Context) (int64, *ResourceError) { + attrs, rerr := r.attrs(ctx) + if rerr != nil { + return 0, rerr + } + return attrs.Size, nil +} + +func gcsErrorToException(err error) *ResourceError { + if gErr, ok := err.(*googleapi.Error); ok { + switch gErr.Code { + case http.StatusNotFound: + return NotFound(err) + case http.StatusForbidden: + return Forbidden(err) + case http.StatusBadRequest: + return BadRequest(err) + } + } + + return Other(err) +} diff --git a/pkg/fetcher/fetcher_http.go b/pkg/fetcher/fetcher_http.go new file mode 100644 index 00000000..18244f0f --- /dev/null +++ b/pkg/fetcher/fetcher_http.go @@ -0,0 +1,257 @@ +package fetcher + +import ( + "context" + "io" + "net/http" + "path" + "slices" + "strconv" + "strings" + + "github.com/pkg/errors" + "github.com/readium/go-toolkit/pkg/manifest" + "github.com/readium/go-toolkit/pkg/mediatype" + "github.com/readium/go-toolkit/pkg/util/url" +) + +type HTTPFetcher struct { + href string + client *http.Client + url url.AbsoluteURL +} + +func NewHTTPFetcher(href string, client *http.Client, url url.AbsoluteURL) *HTTPFetcher { + if client == nil { + panic("HTTPFetcher requires a non-nil client") + } + return &HTTPFetcher{ + href: href, + client: client, + url: url, + } +} + +// Links implements Fetcher +func (f *HTTPFetcher) Links(ctx context.Context) (manifest.LinkList, error) { + // It's impossible to determine what the items in a folder are on a remote HTTP server + // This limits the parsers' abilities to realize that a folder is a certain type of publication + if strings.HasSuffix(f.url.Path(), "/") { + // Folder + return manifest.LinkList{{ + Href: manifest.NewHREF(url.MustURLFromString(f.href)), + MediaType: &mediatype.Binary, + }}, nil + } + + // No slash, assume a file + ext := path.Ext(f.url.Filename()) + if ext != "" { + ext = ext[1:] + } + mt := mediatype.OfExtension(ext) + if mt == nil { + mt = &mediatype.Binary + } + + return manifest.LinkList{{ + Href: manifest.NewHREF(url.MustURLFromString(f.href)), + MediaType: mt, + }}, nil +} + +// Get implements Fetcher +func (f *HTTPFetcher) Get(ctx context.Context, link manifest.Link) Resource { + linkHref := link.Href.String() + if strings.HasPrefix(linkHref, f.href) { + rurl, err := url.RelativeURLFromString(strings.TrimPrefix(linkHref, f.href)) + if err == nil { + return &httpResource{ + link: link, + client: f.client, + url: f.url.Resolve(rurl).(url.AbsoluteURL), + } + } + } + + return NewFailureResource(link, NotFound(errors.New("couldn't find "+linkHref+" in HTTPFetcher paths"))) +} + +func (f *HTTPFetcher) Close() { + // No-op for HTTP +} + +// Resource from HTTP +type httpResource struct { + link manifest.Link + client *http.Client + url url.AbsoluteURL + + cachedSize *int64 +} + +// Link implements Resource +func (r *httpResource) Link() manifest.Link { + return r.link +} + +// Properties implements Resource +func (r *httpResource) Properties() manifest.Properties { + return manifest.Properties{} +} + +// Close implements Resource +func (r *httpResource) Close() { + // No-op for HTTP +} + +// File implements Resource +func (r *httpResource) File() string { + return "" +} + +func (r *httpResource) size(ctx context.Context) (int64, *ResourceError) { + if r.cachedSize == nil { + req, err := http.NewRequestWithContext(ctx, http.MethodHead, r.url.String(), nil) + if err != nil { + return 0, Other(err) + } + resp, err := r.client.Do(req) + if err != nil { + return 0, Other(err) + } + defer resp.Body.Close() + if resp.StatusCode != http.StatusOK { + return 0, httpStatusToException(resp.StatusCode) + } + + // HTTP server *must* support byte range requests + arvs := resp.Header.Values("Accept-Ranges") + if !slices.Contains(arvs, "bytes") { + return 0, Other(errors.New("HTTP server does not support byte range requests")) + } + + // HTTP server *must* return Content-Length header + lengthStr := resp.Header.Get("Content-Length") + if lengthStr == "" { + return 0, Other(errors.New("HTTP server did not return Content-Length header")) + } + length, err := strconv.ParseInt(lengthStr, 10, 64) + if err != nil { + return 0, Other(errors.Wrap(err, "failed to parse Content-Length header")) + } + r.cachedSize = &length + + } + return *r.cachedSize, nil +} + +// Read implements Resource +func (r *httpResource) Read(ctx context.Context, start int64, end int64) ([]byte, *ResourceError) { + if end < start { + return nil, RangeNotSatisfiable(errors.New("end of range smaller than start")) + } + + req, err := http.NewRequestWithContext(ctx, http.MethodGet, r.url.String(), nil) + if err != nil { + return nil, Other(err) + } + + if start != 0 || end != 0 { + var sb strings.Builder + sb.WriteString("bytes=") + sb.WriteString(strconv.FormatInt(start, 10)) + sb.WriteString("-") + if end > 0 { + sb.WriteString(strconv.FormatInt(end, 10)) + } + req.Header.Set("Range", sb.String()) + } + resp, err := r.client.Do(req) + if err != nil { + return nil, Other(err) + } + if resp.StatusCode != http.StatusPartialContent { + ex := httpStatusToException(resp.StatusCode) + if ex == nil { + return nil, Other(errors.New("unexpected HTTP status code: " + strconv.Itoa(resp.StatusCode))) + } + return nil, ex + } + defer resp.Body.Close() + + var data []byte + if resp.ContentLength >= 0 { + data = make([]byte, resp.ContentLength) + _, err = io.ReadFull(resp.Body, data) + } else { + data, err = io.ReadAll(resp.Body) + } + if err != nil { + return nil, Other(err) + } + return data, nil +} + +// Stream implements Resource +func (r *httpResource) Stream(ctx context.Context, w io.Writer, start int64, end int64) (int64, *ResourceError) { + if end < start { + return -1, RangeNotSatisfiable(errors.New("end of range smaller than start")) + } + + req, err := http.NewRequestWithContext(ctx, http.MethodGet, r.url.String(), nil) + if err != nil { + return -1, Other(err) + } + + if start != 0 || end != 0 { + var sb strings.Builder + sb.WriteString("bytes=") + sb.WriteString(strconv.FormatInt(start, 10)) + sb.WriteString("-") + if end > 0 { + sb.WriteString(strconv.FormatInt(end, 10)) + } + req.Header.Set("Range", sb.String()) + } + resp, err := r.client.Do(req) + if err != nil { + return -1, Other(err) + } + if resp.StatusCode != http.StatusPartialContent { + ex := httpStatusToException(resp.StatusCode) + if ex == nil { + return -1, Other(errors.New("unexpected HTTP status code: " + strconv.Itoa(resp.StatusCode))) + } + return -1, ex + } + defer resp.Body.Close() + + n, err := io.Copy(w, resp.Body) + if err != nil { + return -1, Other(err) + } + return n, nil +} + +// Length implements Resource +func (r *httpResource) Length(ctx context.Context) (int64, *ResourceError) { + size, rerr := r.size(ctx) + if rerr != nil { + return 0, rerr + } + return size, nil +} + +func httpStatusToException(status int) *ResourceError { + if status == 0 { + return nil + } + + switch status { + case http.StatusOK, http.StatusCreated, http.StatusAccepted, http.StatusPartialContent, http.StatusNoContent, http.StatusResetContent, http.StatusNotModified: + return nil + default: + return NewResourceError(ResourceErrorCode(status)) + } +} diff --git a/pkg/fetcher/fetcher_s3.go b/pkg/fetcher/fetcher_s3.go new file mode 100644 index 00000000..d44b84f2 --- /dev/null +++ b/pkg/fetcher/fetcher_s3.go @@ -0,0 +1,279 @@ +package fetcher + +import ( + "context" + "errors" + "io" + "path" + "strconv" + "strings" + + "github.com/aws/aws-sdk-go-v2/aws" + "github.com/aws/aws-sdk-go-v2/service/s3" + "github.com/aws/aws-sdk-go-v2/service/s3/types" + "github.com/aws/smithy-go" + "github.com/readium/go-toolkit/pkg/manifest" + "github.com/readium/go-toolkit/pkg/mediatype" + "github.com/readium/go-toolkit/pkg/util/url" +) + +type S3Fetcher struct { + href string + client *s3.Client + bucket string + key string + + cachedLinks manifest.LinkList +} + +func NewS3Fetcher(href string, client *s3.Client, bucket, key string) *S3Fetcher { + if client == nil { + panic("S3Fetcher requires a non-nil client") + } + return &S3Fetcher{ + href: href, + client: client, + bucket: bucket, + key: key, + } +} + +// Links implements Fetcher +func (f *S3Fetcher) Links(ctx context.Context) (manifest.LinkList, error) { + if len(f.cachedLinks) > 0 { + return f.cachedLinks, nil + } + + prefix := f.key + if !strings.HasSuffix(prefix, "/") { + prefix += "/" + } + + // List all items in the "folder" + out, err := f.client.ListObjectsV2(ctx, &s3.ListObjectsV2Input{ + Bucket: &f.bucket, + Prefix: &prefix, + // MaxKeys is omitted, can list up to 1000 files by default. Should be enough + // and serve as a sanity check. We can see about increasing this based on implementer feedback. + }) + if err != nil { + return nil, err + } + if len(out.Contents) > 0 { + f.cachedLinks = make(manifest.LinkList, len(out.Contents)) + for i, v := range out.Contents { + if v.Size != nil && *v.Size == 0 { + continue + } + + href, err := manifest.NewHREFFromString(path.Join(f.href, strings.TrimPrefix(*v.Key, prefix)), false) + if err != nil { + return nil, err + } + f.cachedLinks[i].Href = href + + ext := path.Ext(*v.Key) + f.cachedLinks[i].MediaType = &mediatype.Binary + if ext != "" { + mt := mediatype.OfExtension(ext[1:]) + if mt != nil { + f.cachedLinks[i].MediaType = mt + } + } + } + } else { + // Empty directory + if strings.HasSuffix(f.key, "/") { + return f.cachedLinks, nil + } + + ext := path.Ext(f.key) + if ext != "" { + ext = ext[1:] + } + mt := mediatype.OfExtension(ext) + if mt == nil { + mt = &mediatype.Binary + } + + // Not a directory, just a single file + f.cachedLinks = manifest.LinkList{{ + Href: manifest.NewHREF(url.MustURLFromString(f.href)), + MediaType: mt, + }} + } + + return f.cachedLinks, nil +} + +// Get implements Fetcher +func (f *S3Fetcher) Get(ctx context.Context, link manifest.Link) Resource { + linkHref := link.Href.String() + if strings.HasPrefix(linkHref, f.href) { + resourceFile := path.Join(f.key, strings.TrimPrefix(linkHref, f.href)) + return &s3Resource{ + link: link, + client: f.client, + bucket: f.bucket, + key: resourceFile, + } + } + + return NewFailureResource(link, NotFound(errors.New("couldn't find "+linkHref+" in S3Fetcher paths"))) +} + +func (f *S3Fetcher) Close() { + // No-op for S3 +} + +// Resource from S3 +type s3Resource struct { + link manifest.Link + client *s3.Client + bucket string + key string + + cachedHead *s3.HeadObjectOutput +} + +// Link implements Resource +func (r *s3Resource) Link() manifest.Link { + return r.link +} + +// Properties implements Resource +func (r *s3Resource) Properties() manifest.Properties { + return manifest.Properties{} +} + +// Close implements Resource +func (r *s3Resource) Close() { + // No-op for S3 +} + +// File implements Resource +func (r *s3Resource) File() string { + return "" +} + +func (r *s3Resource) object() *s3.GetObjectInput { + return &s3.GetObjectInput{ + Bucket: &r.bucket, + Key: &r.key, + } +} + +func (r *s3Resource) head(ctx context.Context) (*s3.HeadObjectOutput, *ResourceError) { + if r.cachedHead == nil { + head, err := r.client.HeadObject(ctx, &s3.HeadObjectInput{ + Bucket: &r.bucket, + Key: &r.key, + }) + if err != nil { + return nil, awsErrorToException(err) + } + r.cachedHead = head + } + return r.cachedHead, nil +} + +// Read implements Resource +func (r *s3Resource) Read(ctx context.Context, start int64, end int64) ([]byte, *ResourceError) { + if end < start { + return nil, RangeNotSatisfiable(errors.New("end of range smaller than start")) + } + + obj := r.object() + if start != 0 || end != 0 { + var sb strings.Builder + sb.WriteString("bytes=") + sb.WriteString(strconv.FormatInt(start, 10)) + sb.WriteString("-") + if end > 0 { + sb.WriteString(strconv.FormatInt(end, 10)) + } + obj.Range = aws.String(sb.String()) + } + + output, err := r.client.GetObject(ctx, r.object()) + if err != nil { + return nil, awsErrorToException(err) + } + defer output.Body.Close() + + var data []byte + if output.ContentLength != nil && *output.ContentLength >= 0 { + data = make([]byte, *output.ContentLength) + _, err = io.ReadFull(output.Body, data) + } else { + data, err = io.ReadAll(output.Body) + } + if err != nil { + return nil, Other(err) + } + return data, nil +} + +// Stream implements Resource +func (r *s3Resource) Stream(ctx context.Context, w io.Writer, start int64, end int64) (int64, *ResourceError) { + if end < start { + return -1, RangeNotSatisfiable(errors.New("end of range smaller than start")) + } + + obj := r.object() + if start != 0 || end != 0 { + var sb strings.Builder + sb.WriteString("bytes=") + sb.WriteString(strconv.FormatInt(start, 10)) + sb.WriteString("-") + if end > 0 { + sb.WriteString(strconv.FormatInt(end, 10)) + } + obj.Range = aws.String(sb.String()) + } + + output, err := r.client.GetObject(ctx, obj) + if err != nil { + return -1, awsErrorToException(err) + } + defer output.Body.Close() + + n, err := io.Copy(w, output.Body) + if err != nil { + return -1, Other(err) + } + return n, nil +} + +// Length implements Resource +func (r *s3Resource) Length(ctx context.Context) (int64, *ResourceError) { + head, rerr := r.head(ctx) + if rerr != nil { + return 0, rerr + } + if head.ContentLength == nil { + return 0, Other(errors.New("object does not have length")) + } + return *head.ContentLength, nil +} + +func awsErrorToException(err error) *ResourceError { + var notFound *types.NotFound + var noSuchKey *types.NoSuchKey + var noSuchBucket *types.NoSuchBucket + var invalidObjectState *types.InvalidObjectState + if errors.As(err, ¬Found) || errors.As(err, &noSuchKey) || errors.As(err, &noSuchBucket) { + return NotFound(err) + } else if errors.As(err, &invalidObjectState) { + return BadRequest(err) + } else { + var ae smithy.APIError + if errors.As(err, &ae) { + if ae.ErrorCode() == "AccessDenied" { + return Forbidden(err) + } + } + } + + return Other(err) +} diff --git a/pkg/fetcher/fetcher_transforming.go b/pkg/fetcher/fetcher_transforming.go index 5ab7ea54..268739e0 100644 --- a/pkg/fetcher/fetcher_transforming.go +++ b/pkg/fetcher/fetcher_transforming.go @@ -1,6 +1,10 @@ package fetcher -import "github.com/readium/go-toolkit/pkg/manifest" +import ( + "context" + + "github.com/readium/go-toolkit/pkg/manifest" +) // Transforms the resources' content of a child fetcher using a list of [ResourceTransformer] functions. type TransformingFetcher struct { @@ -9,13 +13,13 @@ type TransformingFetcher struct { } // Links implements Fetcher -func (f *TransformingFetcher) Links() (manifest.LinkList, error) { - return f.fetcher.Links() +func (f *TransformingFetcher) Links(ctx context.Context) (manifest.LinkList, error) { + return f.fetcher.Links(ctx) } // Get implements Fetcher -func (f *TransformingFetcher) Get(link manifest.Link) Resource { - resource := f.fetcher.Get(link) +func (f *TransformingFetcher) Get(ctx context.Context, link manifest.Link) Resource { + resource := f.fetcher.Get(ctx, link) for _, transformer := range f.transformers { resource = transformer(resource) } diff --git a/pkg/fetcher/fs.go b/pkg/fetcher/fs.go new file mode 100644 index 00000000..90be8eb1 --- /dev/null +++ b/pkg/fetcher/fs.go @@ -0,0 +1,176 @@ +package fetcher + +import ( + "context" + "errors" + "io" + "io/fs" + "path" + "sync/atomic" + "time" + + "github.com/readium/go-toolkit/pkg/manifest" + "github.com/readium/go-toolkit/pkg/util/url" +) + +type resourceInfo struct { + Resource + length int64 +} + +// IsDir implements [fs.FileInfo] +func (r resourceInfo) IsDir() bool { + return false +} + +// ModTime implements [fs.FileInfo] +func (r resourceInfo) ModTime() time.Time { + return time.Time{} // Zero time +} + +// Mode implements [fs.FileInfo] +func (r resourceInfo) Mode() fs.FileMode { + return 0444 // Read-only +} + +// Name implements [fs.FileInfo] +func (r resourceInfo) Name() string { + return path.Base(r.Resource.Link().Href.String()) +} + +// Size implements [fs.FileInfo] +func (r resourceInfo) Size() int64 { + return r.length +} + +// Sys implements [fs.FileInfo] +func (r resourceInfo) Sys() any { + return r.Resource +} + +type fsResource struct { + r Resource + offset atomic.Int64 + ctx context.Context +} + +// Close implements [fs.File] +func (f *fsResource) Close() error { + f.r.Close() + return nil +} + +// ReadAt implements [io.ReaderAt] +func (f *fsResource) ReadAt(b []byte, off int64) (int, error) { + bin, rerr := f.r.Read(f.ctx, off, off+int64(len(b))-1) + if rerr != nil { + if rerr.Cause == io.EOF { + copy(b, bin) + return len(bin), io.EOF + } + return len(bin), rerr + } + return copy(b, bin), nil +} + +// Seek implements [io.Seeker] +func (f *fsResource) Seek(offset int64, whence int) (int64, error) { + switch whence { + case io.SeekStart: + f.offset.Store(offset) + return offset, nil + case io.SeekCurrent: + return f.offset.Add(offset), nil + case io.SeekEnd: + length, err := f.r.Length(f.ctx) + if err != nil { + return length, err + } + newOffset := length + offset + f.offset.Store(newOffset) + return newOffset, nil + default: + return -1, errors.New("invalid whence") + } +} + +// Read implements [fs.File] +func (f *fsResource) Read(b []byte) (int, error) { + blen := int64(len(b)) + currentOffset := f.offset.Add(blen) - blen + bin, rerr := f.r.Read(f.ctx, currentOffset, currentOffset+blen-1) + if rerr != nil { + if rerr.Cause == io.EOF { + copy(b, bin) + return len(bin), io.EOF + } + return len(bin), rerr + } + return copy(b, bin), nil +} + +// Stat implements [fs.File] +func (f *fsResource) Stat() (fs.FileInfo, error) { + length, err := f.r.Length(f.ctx) + if err != nil { + return nil, err + } + + return resourceInfo{ + Resource: f.r, + length: length, + }, nil +} + +// TODO: directory listing support +type fsFetcher struct { + Fetcher + ctx context.Context +} + +func (f fsFetcher) get(name string) (Resource, error) { + u, err := url.URLFromString(name) + if err != nil { + return nil, err + } + + return f.Get(f.ctx, manifest.Link{Href: manifest.NewHREF(u)}), nil +} + +// Stat implements [fs.StatFS] +func (f fsFetcher) Stat(name string) (fs.FileInfo, error) { + r, err := f.get(name) + if err != nil { + return nil, err + } + + length, rerr := r.Length(f.ctx) + if rerr != nil { + return nil, rerr + } + + return resourceInfo{ + Resource: r, + length: length, + }, nil +} + +// Open implements [fs.FS] +func (f fsFetcher) Open(name string) (fs.File, error) { + r, err := f.get(name) + if err != nil { + return nil, err + } + + return &fsResource{r: r, ctx: f.ctx}, nil +} + +// Turn a [Fetcher] into a [fs.FS] filesystem +func ToFS(ctx context.Context, f Fetcher) fsFetcher { + return fsFetcher{f, ctx} +} + +// Turn a [Resource] into a [fs.File] virtual file +func ToFSFile(ctx context.Context, r Resource) fs.File { + return &fsResource{r: r, ctx: ctx} +} diff --git a/pkg/fetcher/reader.go b/pkg/fetcher/reader.go index 4f3f1707..b020df20 100644 --- a/pkg/fetcher/reader.go +++ b/pkg/fetcher/reader.go @@ -1,6 +1,7 @@ package fetcher import ( + "context" "errors" ) @@ -34,7 +35,7 @@ func (rs *ResourceReadSeeker) Seek(offset int64, whence int) (int64, error) { return rs.offset, nil case 2: if rs.length == 0 { - length, errx := rs.r.Length() + length, errx := rs.r.Length(context.TODO()) if errx != nil { return 0, errx } @@ -52,7 +53,7 @@ func (rs *ResourceReadSeeker) Seek(offset int64, whence int) (int64, error) { // Seek implements io.ReadSeeker func (rs *ResourceReadSeeker) Read(p []byte) (n int, err error) { - bin, errx := rs.r.Read(rs.offset, rs.offset+int64(len(p))) + bin, errx := rs.r.Read(context.TODO(), rs.offset, rs.offset+int64(len(p))) if errx != nil { err = errx return diff --git a/pkg/fetcher/resource.go b/pkg/fetcher/resource.go index 656654ba..f76d8970 100644 --- a/pkg/fetcher/resource.go +++ b/pkg/fetcher/resource.go @@ -1,6 +1,7 @@ package fetcher import ( + "context" "encoding/json" "encoding/xml" "errors" @@ -47,29 +48,19 @@ type Resource interface { // Returns data length from metadata if available, or calculated from reading the bytes otherwise. // This value must be treated as a hint, as it might not reflect the actual bytes length. To get the real length, you need to read the whole resource. - Length() (int64, *ResourceError) + Length(ctx context.Context) (int64, *ResourceError) // Reads the bytes at the given range. - // When start and end are null, the whole content is returned. Out-of-range indexes are clamped to the available length automatically. - Read(start int64, end int64) ([]byte, *ResourceError) + // When start and end are zero, the whole content is returned. Out-of-range indexes are clamped to the available length automatically. + Read(ctx context.Context, start int64, end int64) ([]byte, *ResourceError) // Stream the bytes at the given range to a writer. - // When start and end are null, the whole content is returned. Out-of-range indexes are clamped to the available length automatically. - Stream(w io.Writer, start int64, end int64) (int64, *ResourceError) - - // Reads the full content as a string. - // Assumes UTF-8 encoding if no Link charset is given - ReadAsString() (string, *ResourceError) - - // Reads the full content as a JSON object. - ReadAsJSON() (map[string]interface{}, *ResourceError) - - // Reads the full content as a generic XML document. - ReadAsXML(prefixes map[string]string) (*xmlquery.Node, *ResourceError) + // When start and end are zero, the whole content is returned. Out-of-range indexes are clamped to the available length automatically. + Stream(ctx context.Context, w io.Writer, start int64, end int64) (int64, *ResourceError) } -func ReadResourceAsString(r Resource) (string, *ResourceError) { - bytes, ex := r.Read(0, 0) +func ReadResourceAsString(ctx context.Context, r Resource) (string, *ResourceError) { + bytes, ex := r.Read(ctx, 0, 0) if ex != nil { return "", ex } @@ -87,11 +78,12 @@ func ReadResourceAsString(r Resource) (string, *ResourceError) { return string(utf8bytes), nil } -func ReadResourceAsJSON(r Resource) (map[string]interface{}, *ResourceError) { - str, ex := r.ReadAsString() +func ReadResourceAsJSON(ctx context.Context, r Resource) (map[string]interface{}, *ResourceError) { + str, ex := ReadResourceAsString(ctx, r) if ex != nil { return nil, ex } + var object map[string]interface{} err := json.Unmarshal([]byte(str), &object) if err != nil { @@ -100,8 +92,8 @@ func ReadResourceAsJSON(r Resource) (map[string]interface{}, *ResourceError) { return object, nil } -func ReadResourceAsXML(r Resource, prefixes map[string]string) (*xmlquery.Node, *ResourceError) { - bytes, ex := r.Read(0, 0) +func ReadResourceAsXML(ctx context.Context, r Resource, prefixes map[string]string) (*xmlquery.Node, *ResourceError) { + bytes, ex := r.Read(ctx, 0, 0) if ex != nil { return nil, ex } @@ -280,35 +272,20 @@ func (r FailureResource) Properties() manifest.Properties { } // Length implements Resource -func (r FailureResource) Length() (int64, *ResourceError) { +func (r FailureResource) Length(ctx context.Context) (int64, *ResourceError) { return 0, r.ex } // Read implements Resource -func (r FailureResource) Read(start int64, end int64) ([]byte, *ResourceError) { +func (r FailureResource) Read(ctx context.Context, start int64, end int64) ([]byte, *ResourceError) { return nil, r.ex } // Stream implements Resource -func (r FailureResource) Stream(w io.Writer, start int64, end int64) (int64, *ResourceError) { +func (r FailureResource) Stream(ctx context.Context, w io.Writer, start int64, end int64) (int64, *ResourceError) { return -1, r.ex } -// ReadAsString implements Resource -func (r FailureResource) ReadAsString() (string, *ResourceError) { - return "", r.ex -} - -// ReadAsJSON implements Resource -func (r FailureResource) ReadAsJSON() (map[string]interface{}, *ResourceError) { - return nil, r.ex -} - -// ReadAsXML implements Resource -func (r FailureResource) ReadAsXML(prefixes map[string]string) (*xmlquery.Node, *ResourceError) { - return nil, r.ex -} - func NewFailureResource(link manifest.Link, ex *ResourceError) FailureResource { return FailureResource{ link: link, @@ -342,33 +319,18 @@ func (r ProxyResource) Properties() manifest.Properties { } // Length implements Resource -func (r ProxyResource) Length() (int64, *ResourceError) { - return r.Res.Length() +func (r ProxyResource) Length(ctx context.Context) (int64, *ResourceError) { + return r.Res.Length(ctx) } // Read implements Resource -func (r ProxyResource) Read(start int64, end int64) ([]byte, *ResourceError) { - return r.Res.Read(start, end) +func (r ProxyResource) Read(ctx context.Context, start int64, end int64) ([]byte, *ResourceError) { + return r.Res.Read(ctx, start, end) } // Stream implements Resource -func (r ProxyResource) Stream(w io.Writer, start int64, end int64) (int64, *ResourceError) { - return r.Res.Stream(w, start, end) -} - -// ReadAsString implements Resource -func (r ProxyResource) ReadAsString() (string, *ResourceError) { - return r.Res.ReadAsString() -} - -// ReadAsJSON implements Resource -func (r ProxyResource) ReadAsJSON() (map[string]interface{}, *ResourceError) { - return r.Res.ReadAsJSON() -} - -// ReadAsXML implements Resource -func (r ProxyResource) ReadAsXML(prefixes map[string]string) (*xmlquery.Node, *ResourceError) { - return r.Res.ReadAsXML(prefixes) +func (r ProxyResource) Stream(ctx context.Context, w io.Writer, start int64, end int64) (int64, *ResourceError) { + return r.Res.Stream(ctx, w, start, end) } // CompressedAs implements CompressedResource @@ -381,48 +343,48 @@ func (r ProxyResource) CompressedAs(compressionMethod archive.CompressionMethod) } // CompressedLength implements CompressedResource -func (r ProxyResource) CompressedLength() int64 { +func (r ProxyResource) CompressedLength(ctx context.Context) int64 { cres, ok := r.Res.(CompressedResource) if !ok { return -1 } - return cres.CompressedLength() + return cres.CompressedLength(ctx) } // StreamCompressed implements CompressedResource -func (r ProxyResource) StreamCompressed(w io.Writer) (int64, *ResourceError) { +func (r ProxyResource) StreamCompressed(ctx context.Context, w io.Writer) (int64, *ResourceError) { cres, ok := r.Res.(CompressedResource) if !ok { return -1, Other(errors.New("resource is not compressed")) } - return cres.StreamCompressed(w) + return cres.StreamCompressed(ctx, w) } // StreamCompressedGzip implements CompressedResource -func (r ProxyResource) StreamCompressedGzip(w io.Writer) (int64, *ResourceError) { +func (r ProxyResource) StreamCompressedGzip(ctx context.Context, w io.Writer) (int64, *ResourceError) { cres, ok := r.Res.(CompressedResource) if !ok { return -1, Other(errors.New("resource is not compressed")) } - return cres.StreamCompressedGzip(w) + return cres.StreamCompressedGzip(ctx, w) } // ReadCompressed implements CompressedResource -func (r ProxyResource) ReadCompressed() ([]byte, *ResourceError) { +func (r ProxyResource) ReadCompressed(ctx context.Context) ([]byte, *ResourceError) { cres, ok := r.Res.(CompressedResource) if !ok { return nil, Other(errors.New("resource is not compressed")) } - return cres.ReadCompressed() + return cres.ReadCompressed(ctx) } // ReadCompressedGzip implements CompressedResource -func (r ProxyResource) ReadCompressedGzip() ([]byte, *ResourceError) { +func (r ProxyResource) ReadCompressedGzip(ctx context.Context) ([]byte, *ResourceError) { cres, ok := r.Res.(CompressedResource) if !ok { return nil, Other(errors.New("resource is not compressed")) } - return cres.ReadCompressedGzip() + return cres.ReadCompressedGzip(ctx) } /** @@ -434,12 +396,222 @@ func (r ProxyResource) ReadCompressedGzip() ([]byte, *ResourceError) { */ type TransformingResource struct { resource Resource + transform func([]byte) []byte cacheBytes bool _bytes []byte } -// TODO TransformingResource +func NewTransformingResource(resource Resource, cacheBytes bool, transform func([]byte) []byte) *TransformingResource { + return &TransformingResource{ + resource: resource, + cacheBytes: cacheBytes, + transform: transform, + } +} + +func (r *TransformingResource) bytes(ctx context.Context) ([]byte, *ResourceError) { + if len(r._bytes) > 0 { + return r._bytes, nil + } + bin, err := r.resource.Read(ctx, 0, 0) + if err != nil { + return nil, err + } + bytes := r.transform(bin) + if len(bytes) == 0 { + return nil, Other(errors.New("TransformingResource has empty bytes")) + } + + if r.cacheBytes { + r._bytes = bytes + } + return bytes, nil +} + +// File implements Resource +func (r *TransformingResource) File() string { + return r.resource.File() +} + +// Close implements Resource +func (r *TransformingResource) Close() { + r.resource.Close() +} + +// Link implements Resource +func (r *TransformingResource) Link() manifest.Link { + return r.resource.Link() +} + +func (r *TransformingResource) Properties() manifest.Properties { + return r.resource.Properties() +} + +// Length implements Resource +func (r *TransformingResource) Length(ctx context.Context) (int64, *ResourceError) { + if r.cacheBytes { + return int64(len(r._bytes)), nil + } + l, ex := r.resource.Length(ctx) + if ex != nil { + return 0, ex + } + return l, nil +} + +// Read implements Resource +func (r *TransformingResource) Read(ctx context.Context, start int64, end int64) ([]byte, *ResourceError) { + bytes, err := r.bytes(ctx) + if err != nil { + return nil, err + } + if start == 0 && end == 0 { + return bytes, nil + } + + // Bounds check + length := int64(len(bytes)) + if start > length { + start = length + } + if end > (length - 1) { + end = length - 1 + } + + return bytes[start : end+1], nil +} + +// Stream implements Resource +func (r *TransformingResource) Stream(ctx context.Context, w io.Writer, start int64, end int64) (int64, *ResourceError) { + bytes, err := r.bytes(ctx) + if err != nil { + return 0, err + } + if start == 0 && end == 0 { + n, nerr := w.Write(bytes) + return int64(n), Other(nerr) + } + + // Bounds check + length := int64(len(bytes)) + if start > length { + start = length + } + if end > (length - 1) { + end = length - 1 + } + + n, nerr := w.Write(bytes[start : end+1]) + return int64(n), Other(nerr) +} + +// Wraps a [Resource] which will be created only when first accessing one of its members. +type LazyResource struct { + _resource Resource + factory func() Resource +} + +func NewLazyResource(factory func() Resource) *LazyResource { + return &LazyResource{ + factory: factory, + } +} -// TODO LazyResource +func (r *LazyResource) resource() Resource { + if r._resource == nil { + r._resource = r.factory() + } + return r._resource +} + +// File implements Resource +func (r *LazyResource) File() string { + return r.resource().File() +} + +// Close implements Resource +func (r *LazyResource) Close() { + if r._resource != nil { + r.resource().Close() + } +} + +// Link implements Resource +func (r *LazyResource) Link() manifest.Link { + return r.resource().Link() +} + +func (r *LazyResource) Properties() manifest.Properties { + return r.resource().Properties() +} + +// Length implements Resource +func (r *LazyResource) Length(ctx context.Context) (int64, *ResourceError) { + return r.resource().Length(ctx) +} + +// Read implements Resource +func (r *LazyResource) Read(ctx context.Context, start int64, end int64) ([]byte, *ResourceError) { + return r.resource().Read(ctx, start, end) +} + +// Stream implements Resource +func (r *LazyResource) Stream(ctx context.Context, w io.Writer, start int64, end int64) (int64, *ResourceError) { + return r.resource().Stream(ctx, w, start, end) +} + +// CompressedAs implements CompressedResource +func (r *LazyResource) CompressedAs(compressionMethod archive.CompressionMethod) bool { + cres, ok := r.resource().(CompressedResource) + if !ok { + return false + } + return cres.CompressedAs(compressionMethod) +} + +// CompressedLength implements CompressedResource +func (r *LazyResource) CompressedLength(ctx context.Context) int64 { + cres, ok := r.resource().(CompressedResource) + if !ok { + return -1 + } + return cres.CompressedLength(ctx) +} + +// StreamCompressed implements CompressedResource +func (r *LazyResource) StreamCompressed(ctx context.Context, w io.Writer) (int64, *ResourceError) { + cres, ok := r.resource().(CompressedResource) + if !ok { + return -1, Other(errors.New("resource is not compressed")) + } + return cres.StreamCompressed(ctx, w) +} + +// StreamCompressedGzip implements CompressedResource +func (r *LazyResource) StreamCompressedGzip(ctx context.Context, w io.Writer) (int64, *ResourceError) { + cres, ok := r.resource().(CompressedResource) + if !ok { + return -1, Other(errors.New("resource is not compressed")) + } + return cres.StreamCompressedGzip(ctx, w) +} + +// ReadCompressed implements CompressedResource +func (r *LazyResource) ReadCompressed(ctx context.Context) ([]byte, *ResourceError) { + cres, ok := r.resource().(CompressedResource) + if !ok { + return nil, Other(errors.New("resource is not compressed")) + } + return cres.ReadCompressed(ctx) +} + +// ReadCompressedGzip implements CompressedResource +func (r *LazyResource) ReadCompressedGzip(ctx context.Context) ([]byte, *ResourceError) { + cres, ok := r.resource().(CompressedResource) + if !ok { + return nil, Other(errors.New("resource is not compressed")) + } + return cres.ReadCompressedGzip(ctx) +} -// TODO BufferingResource +// TODO FallbackResource, SynchronizedResource, BufferingResource diff --git a/pkg/fetcher/resource_bytes.go b/pkg/fetcher/resource_bytes.go index 9b0b592e..8fa28eff 100644 --- a/pkg/fetcher/resource_bytes.go +++ b/pkg/fetcher/resource_bytes.go @@ -2,11 +2,11 @@ package fetcher import ( "bytes" + "context" "errors" "io" "github.com/readium/go-toolkit/pkg/manifest" - "github.com/readium/xmlquery" ) // BytesResource is a Resource serving a lazy-loaded bytes buffer. @@ -35,8 +35,8 @@ func (r *BytesResource) Properties() manifest.Properties { } // Length implements Resource -func (r *BytesResource) Length() (int64, *ResourceError) { - bin, err := r.Read(0, 0) +func (r *BytesResource) Length(ctx context.Context) (int64, *ResourceError) { + bin, err := r.Read(ctx, 0, 0) if err != nil { return 0, err } @@ -44,7 +44,7 @@ func (r *BytesResource) Length() (int64, *ResourceError) { } // Read implements Resource -func (r *BytesResource) Read(start int64, end int64) ([]byte, *ResourceError) { +func (r *BytesResource) Read(ctx context.Context, start int64, end int64) ([]byte, *ResourceError) { if end < start { err := RangeNotSatisfiable(errors.New("end of range smaller than start")) return nil, err @@ -72,7 +72,7 @@ func (r *BytesResource) Read(start int64, end int64) ([]byte, *ResourceError) { } // Stream implements Resource -func (r *BytesResource) Stream(w io.Writer, start int64, end int64) (int64, *ResourceError) { +func (r *BytesResource) Stream(ctx context.Context, w io.Writer, start int64, end int64) (int64, *ResourceError) { if end < start { err := RangeNotSatisfiable(errors.New("end of range smaller than start")) return -1, err @@ -93,21 +93,6 @@ func (r *BytesResource) Stream(w io.Writer, start int64, end int64) (int64, *Res return n, nil } -// ReadAsString implements Resource -func (r *BytesResource) ReadAsString() (string, *ResourceError) { - return ReadResourceAsString(r) -} - -// ReadAsJSON implements Resource -func (r *BytesResource) ReadAsJSON() (map[string]interface{}, *ResourceError) { - return ReadResourceAsJSON(r) -} - -// ReadAsXML implements Resource -func (r *BytesResource) ReadAsXML(prefixes map[string]string) (*xmlquery.Node, *ResourceError) { - return ReadResourceAsXML(r, prefixes) -} - // NewBytesResource creates a new BytesResources from a lazy loader callback. func NewBytesResource(link manifest.Link, loader func() []byte) *BytesResource { return &BytesResource{link: link, loader: loader} diff --git a/pkg/fetcher/traits.go b/pkg/fetcher/traits.go index 4796afc6..ddbb1013 100644 --- a/pkg/fetcher/traits.go +++ b/pkg/fetcher/traits.go @@ -1,6 +1,7 @@ package fetcher import ( + "context" "io" "github.com/readium/go-toolkit/pkg/archive" @@ -8,9 +9,9 @@ import ( type CompressedResource interface { CompressedAs(compressionMethod archive.CompressionMethod) bool - CompressedLength() int64 - StreamCompressed(w io.Writer) (int64, *ResourceError) - StreamCompressedGzip(w io.Writer) (int64, *ResourceError) - ReadCompressed() ([]byte, *ResourceError) - ReadCompressedGzip() ([]byte, *ResourceError) + CompressedLength(ctx context.Context) int64 + StreamCompressed(ctx context.Context, w io.Writer) (int64, *ResourceError) + StreamCompressedGzip(ctx context.Context, w io.Writer) (int64, *ResourceError) + ReadCompressed(ctx context.Context) ([]byte, *ResourceError) + ReadCompressedGzip(ctx context.Context) ([]byte, *ResourceError) } diff --git a/pkg/manifest/href.go b/pkg/manifest/href.go index d3ca7cfa..90c4ab45 100644 --- a/pkg/manifest/href.go +++ b/pkg/manifest/href.go @@ -104,5 +104,8 @@ func (h HREF) String() string { if h.IsTemplated() { return h.template } + if h.href == nil { + return "" + } return h.href.String() } diff --git a/pkg/manifest/link.go b/pkg/manifest/link.go index 011233d3..fec9d395 100644 --- a/pkg/manifest/link.go +++ b/pkg/manifest/link.go @@ -19,6 +19,7 @@ type Link struct { Properties Properties `json:"properties,omitempty"` // Properties associated to the linked resource. Height uint `json:"height,omitempty"` // Height of the linked resource in pixels. Width uint `json:"width,omitempty"` // Width of the linked resource in pixels. + Size uint `json:"size,omitempty"` // Original size of the resource in bytes. Bitrate float64 `json:"bitrate,omitempty"` // Bitrate of the linked resource in kbps. Duration float64 `json:"duration,omitempty"` // Length of the linked resource in seconds. Languages Strings `json:"language,omitempty"` // Expected language of the linked resource (BCP 47 tag). @@ -68,6 +69,7 @@ func LinkFromJSON(rawJson map[string]interface{}) (*Link, error) { Title: parseOptString(rawJson["title"]), Height: float64ToUint(parseOptFloat64(rawJson["height"])), Width: float64ToUint(parseOptFloat64(rawJson["width"])), + Size: float64ToUint(parseOptFloat64(rawJson["size"])), Bitrate: float64Positive(parseOptFloat64(rawJson["bitrate"])), Duration: float64Positive(parseOptFloat64(rawJson["duration"])), } @@ -125,8 +127,8 @@ func LinkFromJSON(rawJson map[string]interface{}) (*Link, error) { return link, nil } -func LinksFromJSONArray(rawJsonArray []interface{}) ([]Link, error) { - links := make([]Link, 0, len(rawJsonArray)) +func LinksFromJSONArray(rawJsonArray []interface{}) (LinkList, error) { + links := make(LinkList, 0, len(rawJsonArray)) for i, entry := range rawJsonArray { entry, ok := entry.(map[string]interface{}) if !ok { @@ -183,6 +185,9 @@ func (l Link) MarshalJSON() ([]byte, error) { if l.Width > 0 { res["width"] = l.Width } + if l.Size > 0 { + res["size"] = l.Size + } if l.Bitrate > 0 { res["bitrate"] = l.Bitrate } @@ -238,7 +243,7 @@ func (ll LinkList) FirstWithRel(rel string) *Link { // Finds all the links with the given relation. func (ll LinkList) FilterByRel(rel string) LinkList { - flinks := make([]Link, 0) + flinks := make(LinkList, 0) for _, link := range ll { for _, r := range link.Rels { if r == rel { @@ -261,7 +266,7 @@ func (ll LinkList) FirstWithMediaType(mt *mediatype.MediaType) *Link { // Finds all the links matching any of the given media types. func (ll LinkList) FilterByMediaType(mt ...*mediatype.MediaType) LinkList { - flinks := make([]Link, 0) + flinks := make(LinkList, 0) for _, link := range ll { if link.MediaType.Matches(mt...) { flinks = append(flinks, link) diff --git a/pkg/manifest/link_test.go b/pkg/manifest/link_test.go index b3e044ef..9f136035 100644 --- a/pkg/manifest/link_test.go +++ b/pkg/manifest/link_test.go @@ -166,7 +166,7 @@ func TestLinkUnmarshalJSONArray(t *testing.T) { func TestLinkUnmarshalJSONNilArray(t *testing.T) { ll, err := LinksFromJSONArray(nil) assert.NoError(t, err) - assert.Equal(t, []Link{}, ll) + assert.Equal(t, LinkList{}, ll) } func TestLinkUnmarshalJSONArrayRefusesInvalidLinks(t *testing.T) { diff --git a/pkg/manifest/properties.go b/pkg/manifest/properties.go index 3eee516b..53449976 100644 --- a/pkg/manifest/properties.go +++ b/pkg/manifest/properties.go @@ -129,6 +129,25 @@ func (p Properties) Contains() []string { return cv // Maybe TODO: it's a set } +func (p Properties) Hash() HashList { + if p == nil { + return nil + } + v, ok := p["hash"] + if !ok { + return nil + } + cv, ok := v.([]interface{}) + if !ok { + return nil + } + hashes, err := HashListFromJSONArray(cv) + if err != nil { + return nil + } + return hashes +} + func PropertiesFromJSON(rawJson interface{}) (Properties, error) { if rawJson == nil { return make(Properties), nil diff --git a/pkg/manifest/properties_hash.go b/pkg/manifest/properties_hash.go new file mode 100644 index 00000000..7f5ac06d --- /dev/null +++ b/pkg/manifest/properties_hash.go @@ -0,0 +1,64 @@ +package manifest + +import "github.com/pkg/errors" + +type HashAlgorithm string + +// The following hashes keys are reserved for future use, but not necessarily supported by the toolkit. +// If you are using a hash algorithm not listed here, it's better to use a URI, such as `https://blurha.sh`. +// If there's an algorithm you think should be recognized, let us know. +const ( + HashAlgorithmBlake2b HashAlgorithm = "blake2b" + HashAlgorithmBlake2s HashAlgorithm = "blake2s" + HashAlgorithmSHA512 HashAlgorithm = "sha512" + HashAlgorithmSHA256 HashAlgorithm = "sha256" + HashAlgorithmSHA1 HashAlgorithm = "sha1" + HashAlgorithmMD5 HashAlgorithm = "md5" + HashAlgorithmXXH3 HashAlgorithm = "xxh3" + HashAlgorithmCRC32 HashAlgorithm = "crc32" + HashAlgorithmPhashDCT HashAlgorithm = "phash-dct" +) + +type HashValue struct { + Algorithm HashAlgorithm `json:"algorithm"` + Value string `json:"value"` +} + +type HashList []HashValue + +func (h HashList) Value(algorithm HashAlgorithm) (string, bool) { + for _, hash := range h { + if hash.Algorithm == algorithm { + return hash.Value, true + } + } + return "", false +} + +func (h *HashList) Deduplicate() { + seen := make(map[HashAlgorithm]struct{}) + var unique HashList + for _, hash := range *h { + if _, ok := seen[hash.Algorithm]; !ok { + seen[hash.Algorithm] = struct{}{} + unique = append(unique, hash) + } + } + *h = unique +} + +func HashListFromJSONArray(rawJsonArray []interface{}) (HashList, error) { + var hashes HashList + for _, item := range rawJsonArray { + itemMap, ok := item.(map[string]interface{}) + if !ok { + return nil, errors.Errorf("invalid hash item: %v", item) + } + hashValue := HashValue{ + Algorithm: itemMap["algorithm"].(HashAlgorithm), + Value: itemMap["value"].(string), + } + hashes = append(hashes, hashValue) + } + return hashes, nil +} diff --git a/pkg/mediatype/mediatype_of.go b/pkg/mediatype/mediatype_of.go index eb3640c9..220ddd2c 100644 --- a/pkg/mediatype/mediatype_of.go +++ b/pkg/mediatype/mediatype_of.go @@ -1,6 +1,8 @@ package mediatype import ( + "context" + "io/fs" "os" "path/filepath" ) @@ -29,7 +31,7 @@ var Sniffers = []Sniffer{ // sniffers to return a [MediaType] quickly before inspecting the content itself: // - Light Sniffing checks only the provided file extension or media type hints. // - Heavy Sniffing reads the bytes to perform more advanced sniffing. -func of(content SnifferContent, mediaTypes []string, fileExtensions []string, sniffers []Sniffer) *MediaType { +func of(ctx context.Context, content SnifferContent, mediaTypes []string, fileExtensions []string, sniffers []Sniffer) *MediaType { // Light sniffing with only media type hints if len(mediaTypes) > 0 { @@ -37,7 +39,7 @@ func of(content SnifferContent, mediaTypes []string, fileExtensions []string, sn mediaTypes: mediaTypes, } for _, sniffer := range sniffers { - mediaType := sniffer(context) + mediaType := sniffer(ctx, context) if mediaType != nil { return mediaType } @@ -51,7 +53,7 @@ func of(content SnifferContent, mediaTypes []string, fileExtensions []string, sn fileExtensions: fileExtensions, } for _, sniffer := range sniffers { - mediaType := sniffer(context) + mediaType := sniffer(ctx, context) if mediaType != nil { return mediaType } @@ -66,7 +68,7 @@ func of(content SnifferContent, mediaTypes []string, fileExtensions []string, sn fileExtensions: fileExtensions, } for _, sniffer := range sniffers { - mediaType := sniffer(context) + mediaType := sniffer(ctx, context) if mediaType != nil { return mediaType } @@ -108,27 +110,36 @@ func of(content SnifferContent, mediaTypes []string, fileExtensions []string, sn // Resolves a format from a list of mediatypes, list of extensions, and list of sniffers func Of(mediaTypes []string, extensions []string, sniffers []Sniffer) *MediaType { - return of(nil, mediaTypes, extensions, sniffers) + return of(context.Background(), nil, mediaTypes, extensions, sniffers) } func OfStringAndExtension(mediaType string, extension string) *MediaType { - return of(nil, []string{mediaType}, []string{extension}, Sniffers) + return of(context.Background(), nil, []string{mediaType}, []string{extension}, Sniffers) } // Resolves a format from a single mediaType string func OfString(mediaType string) *MediaType { - return of(nil, []string{mediaType}, nil, Sniffers) + return of(context.Background(), nil, []string{mediaType}, nil, Sniffers) } // Resolves a format from a single file extension func OfExtension(extension string) *MediaType { - return of(nil, nil, []string{extension}, Sniffers) + return of(context.Background(), nil, nil, []string{extension}, Sniffers) } // Resolves a format from a file -func OfFile(file *os.File, mediaTypes []string, extensions []string, sniffers []Sniffer) *MediaType { +func OfFile(ctx context.Context, file fs.File, mediaTypes []string, extensions []string, sniffers []Sniffer) *MediaType { if file != nil { - ext := filepath.Ext(file.Name()) + var ext string + if of, ok := file.(*os.File); ok { + ext = filepath.Ext(of.Name()) + } else { + stat, err := file.Stat() + if err == nil { + ext = filepath.Ext(stat.Name()) + } + } + if ext != "" { ext = ext[1:] // Remove the leading "." if extensions == nil { @@ -139,20 +150,20 @@ func OfFile(file *os.File, mediaTypes []string, extensions []string, sniffers [] } } - return of(NewSnifferFileContent(file), mediaTypes, extensions, sniffers) + return of(ctx, NewSnifferFileContent(file), mediaTypes, extensions, sniffers) } // Resolves a format from a file, and nothing else -func OfFileOnly(file *os.File) *MediaType { - return OfFile(file, nil, nil, Sniffers) +func OfFileOnly(ctx context.Context, file fs.File) *MediaType { + return OfFile(ctx, file, nil, nil, Sniffers) } // Resolves a format from bytes, e.g. from an HTTP response. -func OfBytes(bytes []byte, mediaTypes []string, extensions []string, sniffers []Sniffer) *MediaType { - return of(NewSnifferBytesContent(bytes), mediaTypes, extensions, sniffers) +func OfBytes(ctx context.Context, bytes []byte, mediaTypes []string, extensions []string, sniffers []Sniffer) *MediaType { + return of(ctx, NewSnifferBytesContent(bytes), mediaTypes, extensions, sniffers) } // Resolves a format from bytes, e.g. from an HTTP response, and nothing else -func OfBytesOnly(bytes []byte) *MediaType { - return of(NewSnifferBytesContent(bytes), nil, nil, Sniffers) +func OfBytesOnly(ctx context.Context, bytes []byte) *MediaType { + return of(ctx, NewSnifferBytesContent(bytes), nil, nil, Sniffers) } diff --git a/pkg/mediatype/sniffer.go b/pkg/mediatype/sniffer.go index 4019bd20..ffabe5b8 100644 --- a/pkg/mediatype/sniffer.go +++ b/pkg/mediatype/sniffer.go @@ -1,6 +1,7 @@ package mediatype import ( + "context" "encoding/json" "mime" "path/filepath" @@ -9,11 +10,11 @@ import ( "github.com/readium/go-toolkit/pkg/internal/extensions" ) -type Sniffer func(context SnifferContext) *MediaType +type Sniffer func(ctx context.Context, context SnifferContext) *MediaType // Sniffs an XHTML document. // Must precede the HTML sniffer. -func SniffXHTML(context SnifferContext) *MediaType { +func SniffXHTML(ctx context.Context, context SnifferContext) *MediaType { if context.HasFileExtension("xht", "xhtml") || context.HasMediaType("application/xhtml+xml") { return &XHTML } @@ -28,7 +29,7 @@ func SniffXHTML(context SnifferContext) *MediaType { } // Sniffs an HTML document. -func SniffHTML(context SnifferContext) *MediaType { +func SniffHTML(ctx context.Context, context SnifferContext) *MediaType { if context.HasFileExtension("htm", "html") || context.HasMediaType("text/html") { return &HTML } @@ -55,7 +56,7 @@ func SniffHTML(context SnifferContext) *MediaType { } // Sniffs an OPDS document. -func SniffOPDS(context SnifferContext) *MediaType { +func SniffOPDS(ctx context.Context, context SnifferContext) *MediaType { // OPDS 1 (Light) if context.HasMediaType("application/atom+xml;type=entry;profile=opds-catalog") { return &OPDS1Entry @@ -100,7 +101,7 @@ func SniffOPDS(context SnifferContext) *MediaType { } // Sniffs an LCP License Document. -func SniffLCPLicense(context SnifferContext) *MediaType { +func SniffLCPLicense(ctx context.Context, context SnifferContext) *MediaType { if context.HasFileExtension("lcpl") || context.HasMediaType("application/vnd.readium.lcp.license.v1.0+json") { return &LCPLicenseDocument } @@ -112,7 +113,7 @@ func SniffLCPLicense(context SnifferContext) *MediaType { } // Sniffs a bitmap image. -func SniffBitmap(context SnifferContext) *MediaType { +func SniffBitmap(ctx context.Context, context SnifferContext) *MediaType { if context.HasFileExtension("avif") || context.HasMediaType("image/avif") { return &AVIF } @@ -144,7 +145,7 @@ func SniffBitmap(context SnifferContext) *MediaType { } // Sniffs audio files. -func SniffAudio(context SnifferContext) *MediaType { +func SniffAudio(ctx context.Context, context SnifferContext) *MediaType { if context.HasFileExtension("aac") || context.HasMediaType("audio/aac") { return &AAC } @@ -175,7 +176,7 @@ func SniffAudio(context SnifferContext) *MediaType { } // Sniffs a Readium Web Publication, protected or not by LCP. -func SniffWebpub(context SnifferContext) *MediaType { +func SniffWebpub(ctx context.Context, context SnifferContext) *MediaType { if context.HasFileExtension("audiobook") || context.HasMediaType("application/audiobook+zip") { return &ReadiumAudiobook } @@ -212,7 +213,7 @@ func SniffWebpub(context SnifferContext) *MediaType { } // Sniffs a W3C Web Publication Manifest. -func SniffW3CWPUB(context SnifferContext) *MediaType { +func SniffW3CWPUB(ctx context.Context, context SnifferContext) *MediaType { if js := context.ContentAsJSON(); js != nil { if ctx, ok := js["@context"]; ok { if context, ok := ctx.([]interface{}); ok { @@ -232,12 +233,12 @@ func SniffW3CWPUB(context SnifferContext) *MediaType { // Sniffs an EPUB publication. // Reference: https://www.w3.org/publishing/epub3/epub-ocf.html#sec-zip-container-mime -func SniffEPUB(context SnifferContext) *MediaType { +func SniffEPUB(ctx context.Context, context SnifferContext) *MediaType { if context.HasFileExtension("epub") || context.HasMediaType("application/epub+zip") { return &EPUB } - if mimetype := context.ReadArchiveEntryAt("mimetype"); mimetype != nil { + if mimetype := context.ReadArchiveEntryAt(ctx, "mimetype"); mimetype != nil { if strings.TrimSpace(string(mimetype)) == "application/epub+zip" { return &EPUB } @@ -250,15 +251,15 @@ func SniffEPUB(context SnifferContext) *MediaType { // References: // - https://www.w3.org/TR/lpf/ // - https://www.w3.org/TR/pub-manifest/ -func SniffLPF(context SnifferContext) *MediaType { +func SniffLPF(ctx context.Context, context SnifferContext) *MediaType { if context.HasFileExtension("lpf") || context.HasMediaType("application/lpf+zip") { return &LPF } - if context.ContainsArchiveEntryAt("index.html") { + if context.ContainsArchiveEntryAt(ctx, "index.html") { return &LPF } - if entry := context.ReadArchiveEntryAt("publication.json"); entry != nil { + if entry := context.ReadArchiveEntryAt(ctx, "publication.json"); entry != nil { var js map[string]interface{} if err := json.Unmarshal(entry, &js); err == nil && js != nil { if ctx, ok := js["@context"]; ok { @@ -297,7 +298,7 @@ var zab_extensions = map[string]struct{}{ // Sniffs a simple Archive-based format, like Comic Book Archive or Zipped Audio Book. // Reference: https://wiki.mobileread.com/wiki/CBR_and_CBZ -func SniffArchive(context SnifferContext) *MediaType { +func SniffArchive(ctx context.Context, context SnifferContext) *MediaType { if context.HasFileExtension("cbz") || context.HasMediaType("application/vnd.comicbook+zip", "application/x-cbz", "application/x-cbr") { return &CBZ } @@ -305,7 +306,7 @@ func SniffArchive(context SnifferContext) *MediaType { return &ZAB } - if archive, err := context.ContentAsArchive(); err == nil && archive != nil { + if archive, err := context.ContentAsArchive(ctx); err == nil && archive != nil { archiveContainsOnlyExtensions := func(exts map[string]struct{}) bool { for _, zf := range archive.Entries() { if extensions.IsHiddenOrThumbs(zf.Path()) { @@ -337,7 +338,7 @@ func SniffArchive(context SnifferContext) *MediaType { // Sniffs a PDF document. // Reference: https://www.loc.gov/preservation/digital/formats/fdd/fdd000123.shtml -func SniffPDF(context SnifferContext) *MediaType { +func SniffPDF(ctx context.Context, context SnifferContext) *MediaType { if context.HasFileExtension("pdf") || context.HasMediaType("application/pdf") { return &PDF } diff --git a/pkg/mediatype/sniffer_content.go b/pkg/mediatype/sniffer_content.go index e72782cb..166eb6f3 100644 --- a/pkg/mediatype/sniffer_content.go +++ b/pkg/mediatype/sniffer_content.go @@ -4,6 +4,7 @@ import ( "bufio" "bytes" "io" + "io/fs" "os" ) @@ -14,10 +15,12 @@ type SnifferContent interface { // Used to sniff a local file. type SnifferFileContent struct { - file *os.File + file fs.File + name *string + buffer []byte } -func NewSnifferFileContent(file *os.File) SnifferFileContent { +func NewSnifferFileContent(file fs.File) SnifferFileContent { return SnifferFileContent{file: file} } @@ -25,7 +28,6 @@ const MaxReadSize = 5 * 1024 * 1024 // 5MB // Read implements SnifferContent func (s SnifferFileContent) Read() []byte { - s.file.Seek(0, io.SeekStart) info, err := s.file.Stat() if err != nil { return nil @@ -33,18 +35,58 @@ func (s SnifferFileContent) Read() []byte { if info.Size() > MaxReadSize { return nil } - data := make([]byte, info.Size()) - _, err = s.file.Read(data) - if err != nil && err != io.EOF { - return nil + + if of, ok := s.file.(io.ReadSeeker); ok { + of.Seek(0, io.SeekStart) + data := make([]byte, info.Size()) + _, err = s.file.Read(data) + if err != nil && err != io.EOF { + return nil + } + return data + } else { + if s.buffer == nil { + s.buffer = make([]byte, info.Size()) + _, err = s.file.Read(s.buffer) + if err != nil && err != io.EOF { + return nil + } + } + return s.buffer } - return data } // Stream implements SnifferContent func (s SnifferFileContent) Stream() io.Reader { - s.file.Seek(0, io.SeekStart) - return bufio.NewReader(s.file) + if of, ok := s.file.(*os.File); ok { + of.Seek(0, io.SeekStart) + return bufio.NewReader(s.file) + } else { + if r := s.Read(); r != nil { + return bytes.NewReader(r) + } + return nil + } +} + +func (s *SnifferFileContent) Name() string { + if s.name != nil { + return *s.name + } + + if of, ok := s.file.(*os.File); ok { + name := of.Name() + s.name = &name + return name + } else { + info, err := s.file.Stat() + if err != nil { + return "" + } + name := info.Name() + s.name = &name + return name + } } // Used to sniff a byte array. diff --git a/pkg/mediatype/sniffer_context.go b/pkg/mediatype/sniffer_context.go index 53da2d15..a15729ae 100644 --- a/pkg/mediatype/sniffer_context.go +++ b/pkg/mediatype/sniffer_context.go @@ -1,6 +1,7 @@ package mediatype import ( + "context" "encoding/json" "encoding/xml" "errors" @@ -8,6 +9,7 @@ import ( "strings" "github.com/readium/go-toolkit/pkg/archive" + "github.com/readium/go-toolkit/pkg/util/url" "golang.org/x/text/encoding" ) @@ -156,14 +158,18 @@ func (s SnifferContext) ContentAsXML() *XMLNode { // Content as an Archive instance. // Warning: Archive is only supported for a local file, for now. -func (s *SnifferContext) ContentAsArchive() (archive.Archive, error) { +func (s *SnifferContext) ContentAsArchive(ctx context.Context) (archive.Archive, error) { if !s._loadedContentAsArchive { s._loadedContentAsArchive = true switch s.content.(type) { case SnifferFileContent: { fileSniffer := s.content.(SnifferFileContent) - a, err := archive.NewArchiveFactory().Open(fileSniffer.file.Name(), "") + u, err := url.FromFilepath(fileSniffer.Name()) + if err != nil { + return nil, err + } + a, err := archive.NewArchiveFactory().Open(ctx, u, "") if err != nil { return nil, err } @@ -172,7 +178,7 @@ func (s *SnifferContext) ContentAsArchive() (archive.Archive, error) { case SnifferBytesContent: { fileSniffer := s.content.(SnifferBytesContent) - a, err := archive.NewArchiveFactory().OpenBytes(fileSniffer.bytes, "") + a, err := archive.NewArchiveFactory().OpenBytes(ctx, fileSniffer.bytes, "") if err != nil { return nil, err } @@ -273,8 +279,8 @@ func (s SnifferContext) ContainsJSONKeys(keys ...string) bool { } // Returns whether an Archive entry exists in this file. -func (s SnifferContext) ContainsArchiveEntryAt(path string) bool { - a, err := s.ContentAsArchive() +func (s SnifferContext) ContainsArchiveEntryAt(ctx context.Context, path string) bool { + a, err := s.ContentAsArchive(ctx) if err != nil { return false } @@ -286,8 +292,8 @@ func (s SnifferContext) ContainsArchiveEntryAt(path string) bool { } // Returns the Archive entry data at the given [path] in this file. -func (s SnifferContext) ReadArchiveEntryAt(path string) []byte { - a, err := s.ContentAsArchive() +func (s SnifferContext) ReadArchiveEntryAt(ctx context.Context, path string) []byte { + a, err := s.ContentAsArchive(ctx) if err != nil { return nil } diff --git a/pkg/mediatype/sniffer_test.go b/pkg/mediatype/sniffer_test.go index 0aaff8f3..fed3f950 100644 --- a/pkg/mediatype/sniffer_test.go +++ b/pkg/mediatype/sniffer_test.go @@ -1,7 +1,7 @@ package mediatype import ( - "io/ioutil" + "io" "mime" "os" "path/filepath" @@ -45,7 +45,7 @@ func TestSnifferFromFile(t *testing.T) { func TestSnifferFromBytes(t *testing.T) { testAudiobook, err := os.Open(filepath.Join("testdata", "audiobook.json")) assert.NoError(t, err) - testAudiobookBytes, err := ioutil.ReadAll(testAudiobook) + testAudiobookBytes, err := io.ReadAll(testAudiobook) testAudiobook.Close() assert.NoError(t, err) assert.Equal(t, &ReadiumAudiobookManifest, MediaTypeOfBytesOnly(testAudiobookBytes)) @@ -56,23 +56,23 @@ func TestSnifferFromFile(t *testing.T) { testCbz, err := os.Open(filepath.Join("testdata", "cbz.unknown")) assert.NoError(t, err) defer testCbz.Close() - assert.Equal(t, &CBZ, OfFileOnly(testCbz), "test CBZ should be identified by heavy Sniffer") + assert.Equal(t, &CBZ, OfFileOnly(t.Context(), testCbz), "test CBZ should be identified by heavy Sniffer") } func TestSnifferFromBytes(t *testing.T) { testCbz, err := os.Open(filepath.Join("testdata", "cbz.unknown")) assert.NoError(t, err) - testCbzBytes, err := ioutil.ReadAll(testCbz) + testCbzBytes, err := io.ReadAll(testCbz) testCbz.Close() assert.NoError(t, err) - assert.Equal(t, &CBZ, OfBytesOnly(testCbzBytes), "test CBZ's bytes should be identified by heavy Sniffer") + assert.Equal(t, &CBZ, OfBytesOnly(t.Context(), testCbzBytes), "test CBZ's bytes should be identified by heavy Sniffer") } func TestSnifferUnknownFormat(t *testing.T) { assert.Nil(t, OfString("invalid"), "\"invalid\" MediaType should be unsniffable") unknownFile, err := os.Open(filepath.Join("testdata", "unknown")) assert.NoError(t, err) - assert.Nil(t, OfFileOnly(unknownFile), "MediaType of unknown file should be unsniffable") + assert.Nil(t, OfFileOnly(t.Context(), unknownFile), "MediaType of unknown file should be unsniffable") } func TestSnifferValidMediaTypeFallback(t *testing.T) { @@ -120,7 +120,7 @@ func TestSniffCBZ(t *testing.T) { testCbz, err := os.Open(filepath.Join("testdata", "cbz.unknown")) assert.NoError(t, err) defer testCbz.Close() - assert.Equal(t, &CBZ, OfFileOnly(testCbz)) + assert.Equal(t, &CBZ, OfFileOnly(t.Context(), testCbz)) } func TestSniffDiViNa(t *testing.T) { @@ -143,7 +143,7 @@ func TestSniffEPUB(t *testing.T) { testEpub, err := os.Open(filepath.Join("testdata", "epub.unknown")) assert.NoError(t, err) defer testEpub.Close() - assert.Equal(t, &EPUB, OfFileOnly(testEpub)) + assert.Equal(t, &EPUB, OfFileOnly(t.Context(), testEpub)) } func TestSniffGIF(t *testing.T) { @@ -159,7 +159,7 @@ func TestSniffHTML(t *testing.T) { testHtml, err := os.Open(filepath.Join("testdata", "html.unknown")) assert.NoError(t, err) defer testHtml.Close() - assert.Equal(t, &HTML, OfFileOnly(testHtml)) + assert.Equal(t, &HTML, OfFileOnly(t.Context(), testHtml)) } func TestSniffXHTML(t *testing.T) { @@ -170,7 +170,7 @@ func TestSniffXHTML(t *testing.T) { testXHtml, err := os.Open(filepath.Join("testdata", "xhtml.unknown")) assert.NoError(t, err) defer testXHtml.Close() - assert.Equal(t, &XHTML, OfFileOnly(testXHtml)) + assert.Equal(t, &XHTML, OfFileOnly(t.Context(), testXHtml)) } func TestSniffJPEG(t *testing.T) { @@ -194,7 +194,7 @@ func TestSniffOPDS1Feed(t *testing.T) { testOPDS1Feed, err := os.Open(filepath.Join("testdata", "opds1-feed.unknown")) assert.NoError(t, err) defer testOPDS1Feed.Close() - assert.Equal(t, &OPDS1, OfFileOnly(testOPDS1Feed)) + assert.Equal(t, &OPDS1, OfFileOnly(t.Context(), testOPDS1Feed)) } func TestSniffOPDS1Entry(t *testing.T) { @@ -203,7 +203,7 @@ func TestSniffOPDS1Entry(t *testing.T) { testOPDS1Entry, err := os.Open(filepath.Join("testdata", "opds1-entry.unknown")) assert.NoError(t, err) defer testOPDS1Entry.Close() - assert.Equal(t, &OPDS1Entry, OfFileOnly(testOPDS1Entry)) + assert.Equal(t, &OPDS1Entry, OfFileOnly(t.Context(), testOPDS1Entry)) } func TestSniffOPDS2Feed(t *testing.T) { @@ -276,7 +276,7 @@ func TestSniffLCPLicenseDocument(t *testing.T) { testLCPLicenseDoc, err := os.Open(filepath.Join("testdata", "lcpl.unknown")) assert.NoError(t, err) defer testLCPLicenseDoc.Close() - assert.Equal(t, &LCPLicenseDocument, OfFileOnly(testLCPLicenseDoc)) + assert.Equal(t, &LCPLicenseDocument, OfFileOnly(t.Context(), testLCPLicenseDoc)) } func TestSniffLPF(t *testing.T) { @@ -286,12 +286,12 @@ func TestSniffLPF(t *testing.T) { testLPF1, err := os.Open(filepath.Join("testdata", "lpf.unknown")) assert.NoError(t, err) defer testLPF1.Close() - assert.Equal(t, &LPF, OfFileOnly(testLPF1)) + assert.Equal(t, &LPF, OfFileOnly(t.Context(), testLPF1)) testLPF2, err := os.Open(filepath.Join("testdata", "lpf-index-html.unknown")) assert.NoError(t, err) defer testLPF2.Close() - assert.Equal(t, &LPF, OfFileOnly(testLPF2)) + assert.Equal(t, &LPF, OfFileOnly(t.Context(), testLPF2)) } func TestSniffPDF(t *testing.T) { @@ -301,7 +301,7 @@ func TestSniffPDF(t *testing.T) { testPDF, err := os.Open(filepath.Join("testdata", "pdf.unknown")) assert.NoError(t, err) defer testPDF.Close() - assert.Equal(t, &PDF, OfFileOnly(testPDF)) + assert.Equal(t, &PDF, OfFileOnly(t.Context(), testPDF)) } func TestSniffPNG(t *testing.T) { @@ -340,7 +340,7 @@ func TestSniffW3CWPUBManifest(t *testing.T) { testW3CWPUB, err := os.Open(filepath.Join("testdata", "w3c-wpub.json")) assert.NoError(t, err) defer testW3CWPUB.Close() - assert.Equal(t, &W3CWPUBManifest, OfFileOnly(testW3CWPUB)) + assert.Equal(t, &W3CWPUBManifest, OfFileOnly(t.Context(), testW3CWPUB)) } func TestSniffZAB(t *testing.T) { @@ -349,7 +349,7 @@ func TestSniffZAB(t *testing.T) { testZAB, err := os.Open(filepath.Join("testdata", "zab.unknown")) assert.NoError(t, err) defer testZAB.Close() - assert.Equal(t, &ZAB, OfFileOnly(testZAB)) + assert.Equal(t, &ZAB, OfFileOnly(t.Context(), testZAB)) } func TestSniffJSON(t *testing.T) { @@ -359,7 +359,7 @@ func TestSniffJSON(t *testing.T) { testJSON, err := os.Open(filepath.Join("testdata", "any.json")) assert.NoError(t, err) defer testJSON.Close() - assert.Equal(t, &JSON, OfFileOnly(testJSON)) + assert.Equal(t, &JSON, OfFileOnly(t.Context(), testJSON)) } func TestSniffSystemMediaTypes(t *testing.T) { diff --git a/pkg/parser/epub/clock_value.go b/pkg/parser/epub/clock_value.go index 94250226..5bbdf7b2 100644 --- a/pkg/parser/epub/clock_value.go +++ b/pkg/parser/epub/clock_value.go @@ -62,9 +62,7 @@ func parseTimecount(value float64, metric string) *float64 { case "min": value *= 60 return &value - case "s": - fallthrough - case "": + case "s", "": return &value case "ms": value /= 1000 diff --git a/pkg/parser/epub/deobfuscator.go b/pkg/parser/epub/deobfuscator.go index fe766b29..26bba9c1 100644 --- a/pkg/parser/epub/deobfuscator.go +++ b/pkg/parser/epub/deobfuscator.go @@ -1,6 +1,7 @@ package epub import ( + "context" "crypto/sha1" "encoding/hex" "io" @@ -47,10 +48,10 @@ func (d DeobfuscatingResource) obfuscation() (string, int64) { return algorithm, v } -func (d DeobfuscatingResource) Read(start, end int64) ([]byte, *fetcher.ResourceError) { +func (d DeobfuscatingResource) Read(ctx context.Context, start, end int64) ([]byte, *fetcher.ResourceError) { algorithm, v := d.obfuscation() if v > 0 { - data, err := d.ProxyResource.Read(start, end) + data, err := d.ProxyResource.Read(ctx, start, end) if err != nil { return nil, err } @@ -67,15 +68,15 @@ func (d DeobfuscatingResource) Read(start, end int64) ([]byte, *fetcher.Resource } // Algorithm not in known, so skip deobfuscation - return d.ProxyResource.Read(start, end) + return d.ProxyResource.Read(ctx, start, end) } -func (d DeobfuscatingResource) Stream(w io.Writer, start int64, end int64) (int64, *fetcher.ResourceError) { +func (d DeobfuscatingResource) Stream(ctx context.Context, w io.Writer, start int64, end int64) (int64, *fetcher.ResourceError) { algorithm, v := d.obfuscation() if v > 0 { if start >= v { // We're past the obfuscated part, just proxy it - return d.ProxyResource.Stream(w, start, end) + return d.ProxyResource.Stream(ctx, w, start, end) } // Create a pipe to proxy the stream for deobfuscation @@ -83,7 +84,7 @@ func (d DeobfuscatingResource) Stream(w io.Writer, start int64, end int64) (int6 // Start piping the resource's stream in a goroutine go func() { - _, err := d.ProxyResource.Stream(pw, start, end) + _, err := d.ProxyResource.Stream(ctx, pw, start, end) if err != nil { pw.CloseWithError(err) } else { @@ -147,7 +148,7 @@ func (d DeobfuscatingResource) Stream(w io.Writer, start int64, end int64) (int6 } // Algorithm not in known, so skip deobfuscation - return d.ProxyResource.Stream(w, start, end) + return d.ProxyResource.Stream(ctx, w, start, end) } // CompressedAs implements CompressedResource @@ -161,53 +162,53 @@ func (d DeobfuscatingResource) CompressedAs(compressionMethod archive.Compressio } // CompressedLength implements CompressedResource -func (d DeobfuscatingResource) CompressedLength() int64 { +func (d DeobfuscatingResource) CompressedLength(ctx context.Context) int64 { _, v := d.obfuscation() if v > 0 { return -1 } - return d.ProxyResource.CompressedLength() + return d.ProxyResource.CompressedLength(ctx) } // StreamCompressed implements CompressedResource -func (d DeobfuscatingResource) StreamCompressed(w io.Writer) (int64, *fetcher.ResourceError) { +func (d DeobfuscatingResource) StreamCompressed(ctx context.Context, w io.Writer) (int64, *fetcher.ResourceError) { _, v := d.obfuscation() if v > 0 { return 0, fetcher.Other(errors.New("cannot stream compressed resource when obfuscated")) } - return d.ProxyResource.StreamCompressed(w) + return d.ProxyResource.StreamCompressed(ctx, w) } // StreamCompressedGzip implements CompressedResource -func (d DeobfuscatingResource) StreamCompressedGzip(w io.Writer) (int64, *fetcher.ResourceError) { +func (d DeobfuscatingResource) StreamCompressedGzip(ctx context.Context, w io.Writer) (int64, *fetcher.ResourceError) { _, v := d.obfuscation() if v > 0 { return 0, fetcher.Other(errors.New("cannot stream compressed resource when obfuscated")) } - return d.ProxyResource.StreamCompressedGzip(w) + return d.ProxyResource.StreamCompressedGzip(ctx, w) } // ReadCompressed implements CompressedResource -func (d DeobfuscatingResource) ReadCompressed() ([]byte, *fetcher.ResourceError) { +func (d DeobfuscatingResource) ReadCompressed(ctx context.Context) ([]byte, *fetcher.ResourceError) { _, v := d.obfuscation() if v > 0 { return nil, fetcher.Other(errors.New("cannot read compressed resource when obfuscated")) } - return d.ProxyResource.ReadCompressed() + return d.ProxyResource.ReadCompressed(ctx) } // ReadCompressedGzip implements CompressedResource -func (d DeobfuscatingResource) ReadCompressedGzip() ([]byte, *fetcher.ResourceError) { +func (d DeobfuscatingResource) ReadCompressedGzip(ctx context.Context) ([]byte, *fetcher.ResourceError) { _, v := d.obfuscation() if v > 0 { return nil, fetcher.Other(errors.New("cannot read compressed resource when obfuscated")) } - return d.ProxyResource.ReadCompressedGzip() + return d.ProxyResource.ReadCompressedGzip(ctx) } func (d DeobfuscatingResource) getHashKeyAdobe() []byte { diff --git a/pkg/parser/epub/deobfuscator_test.go b/pkg/parser/epub/deobfuscator_test.go index ddbd8579..21c7ce43 100644 --- a/pkg/parser/epub/deobfuscator_test.go +++ b/pkg/parser/epub/deobfuscator_test.go @@ -16,7 +16,7 @@ func withDeobfuscator(t *testing.T, href string, algorithm string, start, end in t.Log(href) // Cleartext font - clean, err := ft.Get(manifest.Link{Href: manifest.MustNewHREFFromString("deobfuscation/cut-cut.woff", false)}).Read(start, end) + clean, err := ft.Get(t.Context(), manifest.Link{Href: manifest.MustNewHREFFromString("deobfuscation/cut-cut.woff", false)}).Read(t.Context(), start, end) if !assert.Nil(t, err) { assert.NoError(t, err.Cause) f(nil, nil) @@ -34,7 +34,7 @@ func withDeobfuscator(t *testing.T, href string, algorithm string, start, end in }, } } - obfu, err := NewDeobfuscator(identifier).Transform(ft.Get(link)).Read(start, end) + obfu, err := NewDeobfuscator(identifier).Transform(ft.Get(t.Context(), link)).Read(t.Context(), start, end) if !assert.Nil(t, err) { assert.NoError(t, err.Cause) f(nil, nil) @@ -43,7 +43,7 @@ func withDeobfuscator(t *testing.T, href string, algorithm string, start, end in f(clean, obfu) bbuff := new(bytes.Buffer) - _, err = NewDeobfuscator(identifier).Transform(ft.Get(link)).Stream(bbuff, start, end) + _, err = NewDeobfuscator(identifier).Transform(ft.Get(t.Context(), link)).Stream(t.Context(), bbuff, start, end) if !assert.Nil(t, err) { assert.NoError(t, err.Cause) f(nil, nil) diff --git a/pkg/parser/epub/media_overlay_service.go b/pkg/parser/epub/media_overlay_service.go index 067e08d7..f5083ee2 100644 --- a/pkg/parser/epub/media_overlay_service.go +++ b/pkg/parser/epub/media_overlay_service.go @@ -1,6 +1,7 @@ package epub import ( + "context" "slices" "github.com/readium/go-toolkit/pkg/fetcher" @@ -73,13 +74,13 @@ func (s *MediaOverlayService) HasGuideForResource(href string) bool { return ok } -func (s *MediaOverlayService) GuideForResource(href string) (*manifest.GuidedNavigationDocument, error) { +func (s *MediaOverlayService) GuideForResource(ctx context.Context, href string) (*manifest.GuidedNavigationDocument, error) { // Check if the provided resource has a guided navigation document if link, ok := s.originalSmilAlternates[href]; ok { - res := s.fetcher.Get(link) + res := s.fetcher.Get(ctx, link) defer res.Close() - n, rerr := res.ReadAsXML(map[string]string{ + n, rerr := fetcher.ReadResourceAsXML(ctx, res, map[string]string{ NamespaceOPS: "epub", NamespaceSMIL: "smil", NamespaceSMIL2: "smil2", @@ -118,6 +119,6 @@ func (s *MediaOverlayService) GuideForResource(href string) (*manifest.GuidedNav return nil, nil } -func (s *MediaOverlayService) Get(link manifest.Link) (fetcher.Resource, bool) { - return pub.GetForGuidedNavigationService(s, link) +func (s *MediaOverlayService) Get(ctx context.Context, link manifest.Link) (fetcher.Resource, bool) { + return pub.GetForGuidedNavigationService(ctx, s, link) } diff --git a/pkg/parser/epub/metadata.go b/pkg/parser/epub/metadata.go index ec32da28..968373db 100644 --- a/pkg/parser/epub/metadata.go +++ b/pkg/parser/epub/metadata.go @@ -240,11 +240,7 @@ func (m MetadataParser) parseDcElement(element *xmlquery.Node) *MetadataItem { data := strings.ToLower(element.Data) propName := VocabularyDCTerms + data switch data { - case "creator": - fallthrough - case "contributor": - fallthrough - case "publisher": + case "creator", "contributor", "publisher": c := m.contributorWithLegacyAttr(element, propName, propValue) return &c case "date": @@ -1028,9 +1024,7 @@ func (m *PubMetadataAdapter) Presentation() manifest.Presentation { spread = manifest.SpreadNone case "landscape": spread = manifest.SpreadLandscape - case "portrait": - fallthrough - case "both": + case "portrait", "both": spread = manifest.SpreadBoth } m._presentation.Spread = &spread diff --git a/pkg/parser/epub/metadata_test.go b/pkg/parser/epub/metadata_test.go index 4eb95f42..4765cf7a 100644 --- a/pkg/parser/epub/metadata_test.go +++ b/pkg/parser/epub/metadata_test.go @@ -1,6 +1,7 @@ package epub import ( + "context" "testing" "time" @@ -11,8 +12,8 @@ import ( "github.com/stretchr/testify/assert" ) -func loadMetadata(name string) (*manifest.Metadata, error) { - n, rerr := fetcher.NewFileResource(manifest.Link{}, "./testdata/package/"+name+".opf").ReadAsXML(map[string]string{ +func loadMetadata(ctx context.Context, name string) (*manifest.Metadata, error) { + n, rerr := fetcher.ReadResourceAsXML(ctx, fetcher.NewFileResource(manifest.Link{}, "./testdata/package/"+name+".opf"), map[string]string{ NamespaceOPF: "opf", NamespaceDC: "dc", VocabularyDCTerms: "dcterms", @@ -45,9 +46,9 @@ func loadMetadata(name string) (*manifest.Metadata, error) { } func TestMetadataContributorDCCreatorDefaultsToAuthor(t *testing.T) { - m2, err := loadMetadata("contributors-epub2") + m2, err := loadMetadata(t.Context(), "contributors-epub2") assert.NoError(t, err) - m3, err := loadMetadata("contributors-epub3") + m3, err := loadMetadata(t.Context(), "contributors-epub3") assert.NoError(t, err) contributor := manifest.Contributor{ @@ -58,9 +59,9 @@ func TestMetadataContributorDCCreatorDefaultsToAuthor(t *testing.T) { } func TestMetadataContributorDCPublisherIsPublisher(t *testing.T) { - m2, err := loadMetadata("contributors-epub2") + m2, err := loadMetadata(t.Context(), "contributors-epub2") assert.NoError(t, err) - m3, err := loadMetadata("contributors-epub3") + m3, err := loadMetadata(t.Context(), "contributors-epub3") assert.NoError(t, err) contributor := manifest.Contributor{ @@ -71,9 +72,9 @@ func TestMetadataContributorDCPublisherIsPublisher(t *testing.T) { } func TestMetadataContributorDCContributorDefaultsToContributor(t *testing.T) { - m2, err := loadMetadata("contributors-epub2") + m2, err := loadMetadata(t.Context(), "contributors-epub2") assert.NoError(t, err) - m3, err := loadMetadata("contributors-epub3") + m3, err := loadMetadata(t.Context(), "contributors-epub3") assert.NoError(t, err) contributor := manifest.Contributor{ @@ -84,9 +85,9 @@ func TestMetadataContributorDCContributorDefaultsToContributor(t *testing.T) { } func TestMetadataContributorUnknownRolesIgnored(t *testing.T) { - m2, err := loadMetadata("contributors-epub2") + m2, err := loadMetadata(t.Context(), "contributors-epub2") assert.NoError(t, err) - m3, err := loadMetadata("contributors-epub3") + m3, err := loadMetadata(t.Context(), "contributors-epub3") assert.NoError(t, err) contributor := manifest.Contributor{ @@ -98,9 +99,9 @@ func TestMetadataContributorUnknownRolesIgnored(t *testing.T) { } func TestMetadataContributorFileAsParsed(t *testing.T) { - m2, err := loadMetadata("contributors-epub2") + m2, err := loadMetadata(t.Context(), "contributors-epub2") assert.NoError(t, err) - m3, err := loadMetadata("contributors-epub3") + m3, err := loadMetadata(t.Context(), "contributors-epub3") assert.NoError(t, err) lsa := manifest.NewLocalizedStringFromString("Sorting Key") @@ -114,7 +115,7 @@ func TestMetadataContributorFileAsParsed(t *testing.T) { func TestMetadataContributorLocalizedParsed(t *testing.T) { // EPUB 3 only - m3, err := loadMetadata("contributors-epub3") + m3, err := loadMetadata(t.Context(), "contributors-epub3") assert.NoError(t, err) assert.Contains(t, m3.Contributors, manifest.Contributor{ @@ -127,7 +128,7 @@ func TestMetadataContributorLocalizedParsed(t *testing.T) { func TestMetadataContributorOnlyFirstRoleConsidered(t *testing.T) { // EPUB 3 only - m3, err := loadMetadata("contributors-epub3") + m3, err := loadMetadata(t.Context(), "contributors-epub3") assert.NoError(t, err) contributor := manifest.Contributor{ @@ -140,7 +141,7 @@ func TestMetadataContributorOnlyFirstRoleConsidered(t *testing.T) { func TestMetadataContributorMediaOverlaysNarrator(t *testing.T) { // EPUB 3 only - m3, err := loadMetadata("contributors-epub3") + m3, err := loadMetadata(t.Context(), "contributors-epub3") assert.NoError(t, err) assert.Contains(t, m3.Narrators, manifest.Contributor{ @@ -149,9 +150,9 @@ func TestMetadataContributorMediaOverlaysNarrator(t *testing.T) { } func TestMetadataContributorAuthor(t *testing.T) { - m2, err := loadMetadata("contributors-epub2") + m2, err := loadMetadata(t.Context(), "contributors-epub2") assert.NoError(t, err) - m3, err := loadMetadata("contributors-epub3") + m3, err := loadMetadata(t.Context(), "contributors-epub3") assert.NoError(t, err) contributor := manifest.Contributor{ @@ -163,9 +164,9 @@ func TestMetadataContributorAuthor(t *testing.T) { } func TestMetadataContributorPublisher(t *testing.T) { - m2, err := loadMetadata("contributors-epub2") + m2, err := loadMetadata(t.Context(), "contributors-epub2") assert.NoError(t, err) - m3, err := loadMetadata("contributors-epub3") + m3, err := loadMetadata(t.Context(), "contributors-epub3") assert.NoError(t, err) contributor := manifest.Contributor{ @@ -177,9 +178,9 @@ func TestMetadataContributorPublisher(t *testing.T) { } func TestMetadataContributorTranslator(t *testing.T) { - m2, err := loadMetadata("contributors-epub2") + m2, err := loadMetadata(t.Context(), "contributors-epub2") assert.NoError(t, err) - m3, err := loadMetadata("contributors-epub3") + m3, err := loadMetadata(t.Context(), "contributors-epub3") assert.NoError(t, err) contributor := manifest.Contributor{ @@ -191,9 +192,9 @@ func TestMetadataContributorTranslator(t *testing.T) { } func TestMetadataContributorArtist(t *testing.T) { - m2, err := loadMetadata("contributors-epub2") + m2, err := loadMetadata(t.Context(), "contributors-epub2") assert.NoError(t, err) - m3, err := loadMetadata("contributors-epub3") + m3, err := loadMetadata(t.Context(), "contributors-epub3") assert.NoError(t, err) contributor := manifest.Contributor{ @@ -205,9 +206,9 @@ func TestMetadataContributorArtist(t *testing.T) { } func TestMetadataContributorIllustrator(t *testing.T) { - m2, err := loadMetadata("contributors-epub2") + m2, err := loadMetadata(t.Context(), "contributors-epub2") assert.NoError(t, err) - m3, err := loadMetadata("contributors-epub3") + m3, err := loadMetadata(t.Context(), "contributors-epub3") assert.NoError(t, err) contributor := manifest.Contributor{ @@ -219,9 +220,9 @@ func TestMetadataContributorIllustrator(t *testing.T) { } func TestMetadataContributorColorist(t *testing.T) { - m2, err := loadMetadata("contributors-epub2") + m2, err := loadMetadata(t.Context(), "contributors-epub2") assert.NoError(t, err) - m3, err := loadMetadata("contributors-epub3") + m3, err := loadMetadata(t.Context(), "contributors-epub3") assert.NoError(t, err) contributor := manifest.Contributor{ @@ -233,9 +234,9 @@ func TestMetadataContributorColorist(t *testing.T) { } func TestMetadataContributorNarrator(t *testing.T) { - m2, err := loadMetadata("contributors-epub2") + m2, err := loadMetadata(t.Context(), "contributors-epub2") assert.NoError(t, err) - m3, err := loadMetadata("contributors-epub3") + m3, err := loadMetadata(t.Context(), "contributors-epub3") assert.NoError(t, err) contributor := manifest.Contributor{ @@ -247,9 +248,9 @@ func TestMetadataContributorNarrator(t *testing.T) { } func TestMetadataContributorsNoMoreThanNeeded(t *testing.T) { - m2, err := loadMetadata("contributors-epub2") + m2, err := loadMetadata(t.Context(), "contributors-epub2") assert.NoError(t, err) - m3, err := loadMetadata("contributors-epub3") + m3, err := loadMetadata(t.Context(), "contributors-epub3") assert.NoError(t, err) assert.Len(t, m2.Authors, 2) @@ -274,9 +275,9 @@ func TestMetadataContributorsNoMoreThanNeeded(t *testing.T) { } func TestMetadataTitleParsed(t *testing.T) { - m2, err := loadMetadata("titles-epub2") + m2, err := loadMetadata(t.Context(), "titles-epub2") assert.NoError(t, err) - m3, err := loadMetadata("titles-epub3") + m3, err := loadMetadata(t.Context(), "titles-epub3") assert.NoError(t, err) assert.Equal(t, manifest.NewLocalizedStringFromStrings(map[string]string{ @@ -290,7 +291,7 @@ func TestMetadataTitleParsed(t *testing.T) { func TestMetadataTitleSubtitleParsed(t *testing.T) { // EPUB 3 only - m3, err := loadMetadata("titles-epub3") + m3, err := loadMetadata(t.Context(), "titles-epub3") assert.NoError(t, err) assert.Equal(t, manifest.NewLocalizedStringFromStrings(map[string]string{ @@ -300,13 +301,13 @@ func TestMetadataTitleSubtitleParsed(t *testing.T) { } func TestMetadataNoAccessibility(t *testing.T) { - m, err := loadMetadata("version-default") + m, err := loadMetadata(t.Context(), "version-default") assert.NoError(t, err) assert.Nil(t, m.Accessibility) } func TestMetadataEPUB2Accessibility(t *testing.T) { - m, err := loadMetadata("accessibility-epub2") + m, err := loadMetadata(t.Context(), "accessibility-epub2") assert.NoError(t, err) e := manifest.NewA11y() e.ConformsTo = []manifest.A11yProfile{manifest.EPUBA11y11WCAG21AA, manifest.EPUBA11y11WCAG20AAA, manifest.EPUBA11y10WCAG20A} @@ -329,7 +330,7 @@ func TestMetadataEPUB2Accessibility(t *testing.T) { } func TestMetadataEPUB2TDM(t *testing.T) { - m, err := loadMetadata("tdm-epub2") + m, err := loadMetadata(t.Context(), "tdm-epub2") assert.NoError(t, err) assert.Equal(t, &manifest.TDM{ Policy: "https://provider.com/policies/policy.json", @@ -338,7 +339,7 @@ func TestMetadataEPUB2TDM(t *testing.T) { } func TestMetadataEPUB3Accessibility(t *testing.T) { - m, err := loadMetadata("accessibility-epub3") + m, err := loadMetadata(t.Context(), "accessibility-epub3") assert.NoError(t, err) e := manifest.NewA11y() e.ConformsTo = []manifest.A11yProfile{manifest.EPUBA11y11WCAG21AA, manifest.EPUBA11y11WCAG20AAA, manifest.EPUBA11y10WCAG20A} @@ -361,7 +362,7 @@ func TestMetadataEPUB3Accessibility(t *testing.T) { } func TestMetadataEPUB3TDM(t *testing.T) { - m, err := loadMetadata("tdm-epub3") + m, err := loadMetadata(t.Context(), "tdm-epub3") assert.NoError(t, err) assert.Equal(t, &manifest.TDM{ Policy: "https://provider.com/policies/policy.json", @@ -370,9 +371,9 @@ func TestMetadataEPUB3TDM(t *testing.T) { } func TestMetadataTitleFileAs(t *testing.T) { - m2, err := loadMetadata("titles-epub2") + m2, err := loadMetadata(t.Context(), "titles-epub2") assert.NoError(t, err) - m3, err := loadMetadata("titles-epub3") + m3, err := loadMetadata(t.Context(), "titles-epub3") assert.NoError(t, err) assert.Equal(t, "Adventures", m2.SortAs()) @@ -381,7 +382,7 @@ func TestMetadataTitleFileAs(t *testing.T) { func TestMetadataTitleMainTakesPrecedence(t *testing.T) { // EPUB 3 only - m3, err := loadMetadata("title-main-precedence") + m3, err := loadMetadata(t.Context(), "title-main-precedence") assert.NoError(t, err) assert.Equal(t, "Main title takes precedence", m3.Title()) @@ -389,7 +390,7 @@ func TestMetadataTitleMainTakesPrecedence(t *testing.T) { func TestMetadataTitleSelectedSubtitleHasLowestDisplaySeqProperty(t *testing.T) { // EPUB 3 only - m3, err := loadMetadata("title-multiple-subtitles") + m3, err := loadMetadata(t.Context(), "title-multiple-subtitles") assert.NoError(t, err) assert.Equal(t, manifest.NewLocalizedStringFromStrings(map[string]string{ @@ -399,7 +400,7 @@ func TestMetadataTitleSelectedSubtitleHasLowestDisplaySeqProperty(t *testing.T) func TestMetadataSubjectLocalized(t *testing.T) { // EPUB 3 only - m3, err := loadMetadata("subjects-complex") + m3, err := loadMetadata(t.Context(), "subjects-complex") assert.NoError(t, err) assert.Len(t, m3.Subjects, 1) @@ -411,7 +412,7 @@ func TestMetadataSubjectLocalized(t *testing.T) { func TestMetadataSubjectFileAs(t *testing.T) { // EPUB 3 only - m3, err := loadMetadata("subjects-complex") + m3, err := loadMetadata(t.Context(), "subjects-complex") assert.NoError(t, err) assert.Len(t, m3.Subjects, 1) @@ -420,7 +421,7 @@ func TestMetadataSubjectFileAs(t *testing.T) { func TestMetadataSubjectCodeAndScheme(t *testing.T) { // EPUB 3 only - m3, err := loadMetadata("subjects-complex") + m3, err := loadMetadata(t.Context(), "subjects-complex") assert.NoError(t, err) assert.Len(t, m3.Subjects, 1) @@ -430,7 +431,7 @@ func TestMetadataSubjectCodeAndScheme(t *testing.T) { func TestMetadataSubjectCommaSeparatedSplit(t *testing.T) { // EPUB 3 only - m3, err := loadMetadata("subjects-single") + m3, err := loadMetadata(t.Context(), "subjects-single") assert.NoError(t, err) assert.Contains(t, m3.Subjects, manifest.Subject{LocalizedName: manifest.NewLocalizedStringFromString("apple")}) @@ -440,7 +441,7 @@ func TestMetadataSubjectCommaSeparatedSplit(t *testing.T) { func TestMetadataSubjectCommaSeparatedMultipleNotSplit(t *testing.T) { // EPUB 3 only - m3, err := loadMetadata("subjects-multiple") + m3, err := loadMetadata(t.Context(), "subjects-multiple") assert.NoError(t, err) assert.Contains(t, m3.Subjects, manifest.Subject{LocalizedName: manifest.NewLocalizedStringFromString("fiction")}) @@ -448,9 +449,9 @@ func TestMetadataSubjectCommaSeparatedMultipleNotSplit(t *testing.T) { } func TestMetadataDatePublished(t *testing.T) { - m2, err := loadMetadata("dates-epub2") + m2, err := loadMetadata(t.Context(), "dates-epub2") assert.NoError(t, err) - m3, err := loadMetadata("dates-epub3") + m3, err := loadMetadata(t.Context(), "dates-epub3") assert.NoError(t, err) tx, err := time.Parse(time.RFC3339, "1865-07-04T00:00:00Z") @@ -460,15 +461,15 @@ func TestMetadataDatePublished(t *testing.T) { assert.Equal(t, &tx, m3.Published) // Non-ISO date - m3notiso, err := loadMetadata("dates-epub3-notiso") + m3notiso, err := loadMetadata(t.Context(), "dates-epub3-notiso") assert.NoError(t, err) assert.Equal(t, time.Date(1865, time.January, 1, 0, 0, 0, 0, time.UTC), *m3notiso.Published) } func TestMetadataDateModified(t *testing.T) { - m2, err := loadMetadata("dates-epub2") + m2, err := loadMetadata(t.Context(), "dates-epub2") assert.NoError(t, err) - m3, err := loadMetadata("dates-epub3") + m3, err := loadMetadata(t.Context(), "dates-epub3") assert.NoError(t, err) tx, err := time.Parse(time.RFC3339, "2012-04-02T12:47:00Z") @@ -478,15 +479,15 @@ func TestMetadataDateModified(t *testing.T) { assert.Equal(t, &tx, m3.Modified) // Non-ISO date - m3notiso, err := loadMetadata("dates-epub3-notiso") + m3notiso, err := loadMetadata(t.Context(), "dates-epub3-notiso") assert.NoError(t, err) assert.Equal(t, time.Date(2012, time.April, 1, 0, 0, 0, 0, time.UTC), *m3notiso.Modified) } func TestMetadataConformsToProfileEPUB(t *testing.T) { - m2, err := loadMetadata("contributors-epub2") + m2, err := loadMetadata(t.Context(), "contributors-epub2") assert.NoError(t, err) - m3, err := loadMetadata("contributors-epub3") + m3, err := loadMetadata(t.Context(), "contributors-epub3") assert.NoError(t, err) assert.Contains(t, m2.ConformsTo, manifest.ProfileEPUB) @@ -494,14 +495,14 @@ func TestMetadataConformsToProfileEPUB(t *testing.T) { } func TestMetadataUniqueIdentifierParsed(t *testing.T) { - m3, err := loadMetadata("identifier-unique") + m3, err := loadMetadata(t.Context(), "identifier-unique") assert.NoError(t, err) assert.Equal(t, "urn:uuid:2", m3.Identifier) } func TestMetadataRenditionProperties(t *testing.T) { - m3, err := loadMetadata("presentation-metadata") + m3, err := loadMetadata(t.Context(), "presentation-metadata") assert.NoError(t, err) if assert.NotNil(t, m3.Presentation) { assert.Equal(t, false, *m3.Presentation.Continuous) @@ -514,11 +515,11 @@ func TestMetadataRenditionProperties(t *testing.T) { func TestMetadataCoverLink(t *testing.T) { // Note: not using loadMetadata - m2, err := loadPackageDoc("cover-epub2") + m2, err := loadPackageDoc(t.Context(), "cover-epub2") assert.NoError(t, err) - m3, err := loadPackageDoc("cover-epub3") + m3, err := loadPackageDoc(t.Context(), "cover-epub3") assert.NoError(t, err) - mm, err := loadPackageDoc("cover-mix") + mm, err := loadPackageDoc(t.Context(), "cover-mix") assert.NoError(t, err) expected := &manifest.Link{ @@ -532,12 +533,12 @@ func TestMetadataCoverLink(t *testing.T) { } func TestMetadataCrossRefinings(t *testing.T) { - _, err := loadPackageDoc("meta-termination") + _, err := loadPackageDoc(t.Context(), "meta-termination") assert.NoError(t, err) } func TestMetadataOtherMetadata(t *testing.T) { - m3, err := loadMetadata("meta-others") + m3, err := loadMetadata(t.Context(), "meta-others") assert.NoError(t, err) assert.Equal(t, m3.OtherMetadata, map[string]interface{}{ @@ -561,7 +562,7 @@ func TestMetadataOtherMetadata(t *testing.T) { func TestMetadataCollectionBasic(t *testing.T) { // EPUB 3 only - m3, err := loadMetadata("collections-epub3") + m3, err := loadMetadata(t.Context(), "collections-epub3") assert.NoError(t, err) assert.Contains(t, m3.BelongsToCollections(), manifest.Collection{ @@ -573,7 +574,7 @@ func TestMetadataCollectionBasic(t *testing.T) { func TestMetadataCollectionsWithUnknownTypeInBelongsTo(t *testing.T) { // EPUB 3 only - m3, err := loadMetadata("collections-epub3") + m3, err := loadMetadata(t.Context(), "collections-epub3") assert.NoError(t, err) assert.Contains(t, m3.BelongsToCollections(), manifest.Collection{ @@ -585,7 +586,7 @@ func TestMetadataCollectionsWithUnknownTypeInBelongsTo(t *testing.T) { func TestMetadataCollectionLocalizedSeries(t *testing.T) { // EPUB 3 only - m3, err := loadMetadata("collections-epub3") + m3, err := loadMetadata(t.Context(), "collections-epub3") assert.NoError(t, err) assert.Contains(t, m3.BelongsToSeries(), manifest.Collection{ @@ -599,9 +600,9 @@ func TestMetadataCollectionLocalizedSeries(t *testing.T) { } func TestMetadataCollectionSeriesWithPosition(t *testing.T) { - m2, err := loadMetadata("collections-epub2") + m2, err := loadMetadata(t.Context(), "collections-epub2") assert.NoError(t, err) - m3, err := loadMetadata("collections-epub3") + m3, err := loadMetadata(t.Context(), "collections-epub3") assert.NoError(t, err) expected := manifest.Collection{ diff --git a/pkg/parser/epub/parser.go b/pkg/parser/epub/parser.go index 4dab06fd..5ee7b93a 100644 --- a/pkg/parser/epub/parser.go +++ b/pkg/parser/epub/parser.go @@ -1,6 +1,8 @@ package epub import ( + "context" + "github.com/pkg/errors" "github.com/readium/go-toolkit/pkg/asset" "github.com/readium/go-toolkit/pkg/content/iterator" @@ -25,21 +27,21 @@ func NewParser(strategy ReflowableStrategy) Parser { } // Parse implements PublicationParser -func (p Parser) Parse(asset asset.PublicationAsset, f fetcher.Fetcher) (*pub.Builder, error) { +func (p Parser) Parse(ctx context.Context, asset asset.PublicationAsset, f fetcher.Fetcher) (*pub.Builder, error) { fallbackTitle := asset.Name() - if !asset.MediaType().Equal(&mediatype.EPUB) { + if !asset.MediaType(ctx).Equal(&mediatype.EPUB) { return nil, nil } - opfPath, err := GetRootFilePath(f) + opfPath, err := GetRootFilePath(ctx, f) if err != nil { return nil, err } // Detect DRM - opfXmlDocument, errx := f.Get(manifest.Link{Href: manifest.NewHREF(opfPath)}).ReadAsXML(map[string]string{ + opfXmlDocument, errx := fetcher.ReadResourceAsXML(ctx, f.Get(ctx, manifest.Link{Href: manifest.NewHREF(opfPath)}), map[string]string{ NamespaceOPF: "opf", NamespaceDC: "dc", VocabularyDCTerms: "dcterms", @@ -57,9 +59,9 @@ func (p Parser) Parse(asset asset.PublicationAsset, f fetcher.Fetcher) (*pub.Bui manifest := PublicationFactory{ FallbackTitle: fallbackTitle, PackageDocument: *packageDocument, - NavigationData: parseNavigationData(*packageDocument, f), - EncryptionData: parseEncryptionData(f), - DisplayOptions: parseDisplayOptions(f), + NavigationData: parseNavigationData(ctx, *packageDocument, f), + EncryptionData: parseEncryptionData(ctx, f), + DisplayOptions: parseDisplayOptions(ctx, f), }.Create() ffetcher := f @@ -77,8 +79,8 @@ func (p Parser) Parse(asset asset.PublicationAsset, f fetcher.Fetcher) (*pub.Bui return pub.NewBuilder(manifest, ffetcher, builder), nil } -func parseEncryptionData(fetcher fetcher.Fetcher) (ret map[url.URL]manifest.Encryption) { - n, err := fetcher.Get(manifest.Link{Href: manifest.MustNewHREFFromString("META-INF/encryption.xml", false)}).ReadAsXML(map[string]string{ +func parseEncryptionData(ctx context.Context, f fetcher.Fetcher) (ret map[url.URL]manifest.Encryption) { + n, err := fetcher.ReadResourceAsXML(ctx, f.Get(ctx, manifest.Link{Href: manifest.MustNewHREFFromString("META-INF/encryption.xml", false)}), map[string]string{ NamespaceENC: "enc", NamespaceSIG: "ds", NamespaceCOMP: "comp", @@ -89,7 +91,7 @@ func parseEncryptionData(fetcher fetcher.Fetcher) (ret map[url.URL]manifest.Encr return ParseEncryption(n) } -func parseNavigationData(packageDocument PackageDocument, fetcher fetcher.Fetcher) (ret map[string]manifest.LinkList) { +func parseNavigationData(ctx context.Context, packageDocument PackageDocument, f fetcher.Fetcher) (ret map[string]manifest.LinkList) { ret = make(map[string]manifest.LinkList) if packageDocument.EPUBVersion < 3.0 { var ncxItem *Item @@ -111,7 +113,7 @@ func parseNavigationData(packageDocument PackageDocument, fetcher fetcher.Fetche if ncxItem == nil { return } - n, nerr := fetcher.Get(manifest.Link{Href: manifest.NewHREF(ncxItem.Href)}).ReadAsXML(map[string]string{ + n, nerr := fetcher.ReadResourceAsXML(ctx, f.Get(ctx, manifest.Link{Href: manifest.NewHREF(ncxItem.Href)}), map[string]string{ NamespaceNCX: "ncx", }) if nerr != nil { @@ -134,7 +136,7 @@ func parseNavigationData(packageDocument PackageDocument, fetcher fetcher.Fetche if navItem == nil { return } - n, errx := fetcher.Get(manifest.Link{Href: manifest.NewHREF(navItem.Href)}).ReadAsXML(map[string]string{ + n, errx := fetcher.ReadResourceAsXML(ctx, f.Get(ctx, manifest.Link{Href: manifest.NewHREF(navItem.Href)}), map[string]string{ NamespaceXHTML: "html", NamespaceOPS: "epub", }) @@ -146,11 +148,11 @@ func parseNavigationData(packageDocument PackageDocument, fetcher fetcher.Fetche return } -func parseDisplayOptions(fetcher fetcher.Fetcher) (ret map[string]string) { +func parseDisplayOptions(ctx context.Context, f fetcher.Fetcher) (ret map[string]string) { ret = make(map[string]string) - displayOptionsXml, err := fetcher.Get(manifest.Link{Href: manifest.MustNewHREFFromString("META-INF/com.apple.ibooks.display-options.xml", false)}).ReadAsXML(nil) + displayOptionsXml, err := fetcher.ReadResourceAsXML(ctx, f.Get(ctx, manifest.Link{Href: manifest.MustNewHREFFromString("META-INF/com.apple.ibooks.display-options.xml", false)}), nil) if err != nil { - displayOptionsXml, err = fetcher.Get(manifest.Link{Href: manifest.MustNewHREFFromString("META-INF/com.kobobooks.display-options.xml", false)}).ReadAsXML(nil) + displayOptionsXml, err = fetcher.ReadResourceAsXML(ctx, f.Get(ctx, manifest.Link{Href: manifest.MustNewHREFFromString("META-INF/com.kobobooks.display-options.xml", false)}), nil) if err != nil { return } diff --git a/pkg/parser/epub/parser_encryption.go b/pkg/parser/epub/parser_encryption.go index 5fd18d86..6587cbeb 100644 --- a/pkg/parser/epub/parser_encryption.go +++ b/pkg/parser/epub/parser_encryption.go @@ -3,8 +3,8 @@ package epub import ( "strconv" - "github.com/readium/go-toolkit/pkg/drm" "github.com/readium/go-toolkit/pkg/manifest" + "github.com/readium/go-toolkit/pkg/protection" "github.com/readium/go-toolkit/pkg/util/url" "github.com/readium/xmlquery" ) @@ -45,7 +45,7 @@ func parseEncryptedData(node *xmlquery.Node) (url.URL, *manifest.Encryption) { } if retrievalMethod == "license.lcpl#/encryption/content_key" { - ret.Scheme = drm.SchemeLCP + ret.Scheme = protection.SchemeLCP } if encryptionmethod := node.SelectElement(NSSelect(NamespaceENC, "EncryptionMethod")); encryptionmethod != nil { diff --git a/pkg/parser/epub/parser_encryption_test.go b/pkg/parser/epub/parser_encryption_test.go index dc893230..ec664194 100644 --- a/pkg/parser/epub/parser_encryption_test.go +++ b/pkg/parser/epub/parser_encryption_test.go @@ -1,6 +1,7 @@ package epub import ( + "context" "testing" "github.com/readium/go-toolkit/pkg/fetcher" @@ -9,8 +10,8 @@ import ( "github.com/stretchr/testify/assert" ) -func loadEncryption(name string) (map[string]manifest.Encryption, error) { - n, rerr := fetcher.NewFileResource(manifest.Link{}, "./testdata/encryption/encryption-"+name+".xml").ReadAsXML(map[string]string{ +func loadEncryption(ctx context.Context, name string) (map[string]manifest.Encryption, error) { + n, rerr := fetcher.ReadResourceAsXML(ctx, fetcher.NewFileResource(manifest.Link{}, "./testdata/encryption/encryption-"+name+".xml"), map[string]string{ NamespaceENC: "enc", NamespaceSIG: "ds", NamespaceCOMP: "comp", @@ -44,19 +45,19 @@ var testEncMap = map[string]manifest.Encryption{ } func TestEncryptionParserNamespacePrefixes(t *testing.T) { - e, err := loadEncryption("lcp-prefixes") + e, err := loadEncryption(t.Context(), "lcp-prefixes") assert.NoError(t, err) assert.Equal(t, testEncMap, e) } func TestEncryptionParserDefaultNamespaces(t *testing.T) { - e, err := loadEncryption("lcp-xmlns") + e, err := loadEncryption(t.Context(), "lcp-xmlns") assert.NoError(t, err) assert.Equal(t, testEncMap, e) } func TestEncryptionParserUnknownRetrievalMethod(t *testing.T) { - e, err := loadEncryption("unknown-method") + e, err := loadEncryption(t.Context(), "unknown-method") assert.NoError(t, err) assert.Equal(t, map[string]manifest.Encryption{ url.MustURLFromString("OEBPS/images/image.jpeg").String(): { diff --git a/pkg/parser/epub/parser_navdoc_test.go b/pkg/parser/epub/parser_navdoc_test.go index a38fe7be..7230471f 100644 --- a/pkg/parser/epub/parser_navdoc_test.go +++ b/pkg/parser/epub/parser_navdoc_test.go @@ -1,6 +1,7 @@ package epub import ( + "context" "testing" "github.com/readium/go-toolkit/pkg/fetcher" @@ -9,8 +10,8 @@ import ( "github.com/stretchr/testify/assert" ) -func loadNavDoc(name string) (map[string]manifest.LinkList, error) { - n, rerr := fetcher.NewFileResource(manifest.Link{}, "./testdata/navdoc/"+name+".xhtml").ReadAsXML(map[string]string{ +func loadNavDoc(ctx context.Context, name string) (map[string]manifest.LinkList, error) { + n, rerr := fetcher.ReadResourceAsXML(ctx, fetcher.NewFileResource(manifest.Link{}, "./testdata/navdoc/"+name+".xhtml"), map[string]string{ NamespaceXHTML: "html", NamespaceOPS: "epub", }) @@ -22,7 +23,7 @@ func loadNavDoc(name string) (map[string]manifest.LinkList, error) { } func TestNavDocParserNondirectDescendantOfBody(t *testing.T) { - n, err := loadNavDoc("nav-section") + n, err := loadNavDoc(t.Context(), "nav-section") assert.NoError(t, err) assert.Equal(t, manifest.LinkList{ { @@ -33,7 +34,7 @@ func TestNavDocParserNondirectDescendantOfBody(t *testing.T) { } func TestNavDocParserNewlinesTrimmedFromTitle(t *testing.T) { - n, err := loadNavDoc("nav-titles") + n, err := loadNavDoc(t.Context(), "nav-titles") assert.NoError(t, err) assert.Contains(t, n["toc"], manifest.Link{ Title: "A link with new lines splitting the text", @@ -42,7 +43,7 @@ func TestNavDocParserNewlinesTrimmedFromTitle(t *testing.T) { } func TestNavDocParserSpacesTrimmedFromTitle(t *testing.T) { - n, err := loadNavDoc("nav-titles") + n, err := loadNavDoc(t.Context(), "nav-titles") assert.NoError(t, err) assert.Contains(t, n["toc"], manifest.Link{ Title: "A link with ignorable spaces", @@ -51,7 +52,7 @@ func TestNavDocParserSpacesTrimmedFromTitle(t *testing.T) { } func TestNavDocParserNestestHTMLElementsAllowedInTitle(t *testing.T) { - n, err := loadNavDoc("nav-titles") + n, err := loadNavDoc(t.Context(), "nav-titles") assert.NoError(t, err) assert.Contains(t, n["toc"], manifest.Link{ Title: "A link with nested HTML elements", @@ -60,7 +61,7 @@ func TestNavDocParserNestestHTMLElementsAllowedInTitle(t *testing.T) { } func TestNavDocParserEntryWithoutTitleOrChildrenIgnored(t *testing.T) { - n, err := loadNavDoc("nav-titles") + n, err := loadNavDoc(t.Context(), "nav-titles") assert.NoError(t, err) assert.NotContains(t, n["toc"], manifest.Link{ Title: "", @@ -69,7 +70,7 @@ func TestNavDocParserEntryWithoutTitleOrChildrenIgnored(t *testing.T) { } func TestNavDocParserEntryWithoutLinkOrChildrenIgnored(t *testing.T) { - n, err := loadNavDoc("nav-titles") + n, err := loadNavDoc(t.Context(), "nav-titles") assert.NoError(t, err) assert.NotContains(t, n["toc"], manifest.Link{ Title: "An unlinked element without children must be ignored", @@ -78,7 +79,7 @@ func TestNavDocParserEntryWithoutLinkOrChildrenIgnored(t *testing.T) { } func TestNavDocParserHierarchicalItemsNotAllowed(t *testing.T) { - n, err := loadNavDoc("nav-children") + n, err := loadNavDoc(t.Context(), "nav-children") assert.NoError(t, err) assert.Equal(t, manifest.LinkList{ {Title: "Introduction", Href: manifest.MustNewHREFFromString("OEBPS/xhtml/introduction.xhtml", false)}, @@ -102,13 +103,13 @@ func TestNavDocParserHierarchicalItemsNotAllowed(t *testing.T) { } func TestNavDocParserEmptyDocAccepted(t *testing.T) { - n, err := loadNavDoc("nav-empty") + n, err := loadNavDoc(t.Context(), "nav-empty") assert.NoError(t, err) assert.Empty(t, n["toc"]) } func TestNavDocParserTOC(t *testing.T) { - n, err := loadNavDoc("nav-complex") + n, err := loadNavDoc(t.Context(), "nav-complex") assert.NoError(t, err) assert.Equal(t, manifest.LinkList{ {Title: "Chapter 1", Href: manifest.MustNewHREFFromString("OEBPS/xhtml/chapter1.xhtml", false)}, @@ -117,7 +118,7 @@ func TestNavDocParserTOC(t *testing.T) { } func TestNavDocParserPageList(t *testing.T) { - n, err := loadNavDoc("nav-complex") + n, err := loadNavDoc(t.Context(), "nav-complex") assert.NoError(t, err) assert.Equal(t, manifest.LinkList{ {Title: "1", Href: manifest.MustNewHREFFromString("OEBPS/xhtml/chapter1.xhtml#page1", false)}, diff --git a/pkg/parser/epub/parser_ncx_test.go b/pkg/parser/epub/parser_ncx_test.go index 3d669866..9518dc7e 100644 --- a/pkg/parser/epub/parser_ncx_test.go +++ b/pkg/parser/epub/parser_ncx_test.go @@ -1,6 +1,7 @@ package epub import ( + "context" "testing" "github.com/readium/go-toolkit/pkg/fetcher" @@ -9,8 +10,8 @@ import ( "github.com/stretchr/testify/assert" ) -func loadNcx(name string) (map[string]manifest.LinkList, error) { - n, rerr := fetcher.NewFileResource(manifest.Link{}, "./testdata/ncx/"+name+".ncx").ReadAsXML(map[string]string{ +func loadNcx(ctx context.Context, name string) (map[string]manifest.LinkList, error) { + n, rerr := fetcher.ReadResourceAsXML(ctx, fetcher.NewFileResource(manifest.Link{}, "./testdata/ncx/"+name+".ncx"), map[string]string{ NamespaceNCX: "ncx", }) if rerr != nil { @@ -21,7 +22,7 @@ func loadNcx(name string) (map[string]manifest.LinkList, error) { } func TestNCXParserNewlinesTrimmedFromTitle(t *testing.T) { - n, err := loadNcx("ncx-titles") + n, err := loadNcx(t.Context(), "ncx-titles") assert.NoError(t, err) assert.Contains(t, n["toc"], manifest.Link{ Title: "A link with new lines splitting the text", @@ -30,7 +31,7 @@ func TestNCXParserNewlinesTrimmedFromTitle(t *testing.T) { } func TestNCXParserSpacesTrimmedFromTitle(t *testing.T) { - n, err := loadNcx("ncx-titles") + n, err := loadNcx(t.Context(), "ncx-titles") assert.NoError(t, err) assert.Contains(t, n["toc"], manifest.Link{ Title: "A link with ignorable spaces", @@ -39,7 +40,7 @@ func TestNCXParserSpacesTrimmedFromTitle(t *testing.T) { } func TestNCXParserEntryWithNoTitleOrChildrenIgnored(t *testing.T) { - n, err := loadNcx("ncx-titles") + n, err := loadNcx(t.Context(), "ncx-titles") assert.NoError(t, err) assert.NotContains(t, n["toc"], manifest.Link{ Title: "", @@ -48,7 +49,7 @@ func TestNCXParserEntryWithNoTitleOrChildrenIgnored(t *testing.T) { } func TestNCXParserUnlinkedEntriesWithoutChildrenIgnored(t *testing.T) { - n, err := loadNcx("ncx-titles") + n, err := loadNcx(t.Context(), "ncx-titles") assert.NoError(t, err) assert.NotContains(t, n["toc"], manifest.Link{ Title: "An unlinked element without children must be ignored", @@ -57,7 +58,7 @@ func TestNCXParserUnlinkedEntriesWithoutChildrenIgnored(t *testing.T) { } func TestNCXParserHierarchicalItemsAllowed(t *testing.T) { - n, err := loadNcx("ncx-children") + n, err := loadNcx(t.Context(), "ncx-children") assert.NoError(t, err) assert.Equal(t, manifest.LinkList{ {Title: "Introduction", Href: manifest.MustNewHREFFromString("OEBPS/xhtml/introduction.xhtml", false)}, @@ -81,13 +82,13 @@ func TestNCXParserHierarchicalItemsAllowed(t *testing.T) { } func TestNCXParserEmptyNCX(t *testing.T) { - n, err := loadNcx("ncx-empty") + n, err := loadNcx(t.Context(), "ncx-empty") assert.NoError(t, err) assert.Nil(t, n["toc"]) } func TestNCXParserTOC(t *testing.T) { - n, err := loadNcx("ncx-complex") + n, err := loadNcx(t.Context(), "ncx-complex") assert.NoError(t, err) assert.Equal(t, manifest.LinkList{ {Title: "Chapter 1", Href: manifest.MustNewHREFFromString("OEBPS/xhtml/chapter1.xhtml", false)}, @@ -96,7 +97,7 @@ func TestNCXParserTOC(t *testing.T) { } func TestNCXParserPageList(t *testing.T) { - n, err := loadNcx("ncx-complex") + n, err := loadNcx(t.Context(), "ncx-complex") assert.NoError(t, err) assert.Equal(t, manifest.LinkList{ {Title: "1", Href: manifest.MustNewHREFFromString("OEBPS/xhtml/chapter1.xhtml#page1", false)}, diff --git a/pkg/parser/epub/parser_packagedoc_test.go b/pkg/parser/epub/parser_packagedoc_test.go index 8a233554..b3e96135 100644 --- a/pkg/parser/epub/parser_packagedoc_test.go +++ b/pkg/parser/epub/parser_packagedoc_test.go @@ -1,6 +1,7 @@ package epub import ( + "context" "testing" "github.com/readium/go-toolkit/pkg/fetcher" @@ -10,8 +11,8 @@ import ( "github.com/stretchr/testify/assert" ) -func loadPackageDoc(name string) (*manifest.Manifest, error) { - n, rerr := fetcher.NewFileResource(manifest.Link{}, "./testdata/package/"+name+".opf").ReadAsXML(map[string]string{ +func loadPackageDoc(ctx context.Context, name string) (*manifest.Manifest, error) { + n, rerr := fetcher.ReadResourceAsXML(ctx, fetcher.NewFileResource(manifest.Link{}, "./testdata/package/"+name+".opf"), map[string]string{ NamespaceOPF: "opf", NamespaceDC: "dc", VocabularyDCTerms: "dcterms", @@ -35,31 +36,31 @@ func loadPackageDoc(name string) (*manifest.Manifest, error) { } func TestPackageDocReadingProgressionNoneIsAuto(t *testing.T) { - p, err := loadPackageDoc("progression-none") + p, err := loadPackageDoc(t.Context(), "progression-none") assert.NoError(t, err) assert.Equal(t, manifest.Auto, p.Metadata.ReadingProgression) } func TestPackageDocPageProgression(t *testing.T) { - p, err := loadPackageDoc("progression-default") + p, err := loadPackageDoc(t.Context(), "progression-default") assert.NoError(t, err) assert.Equal(t, manifest.Auto, p.Metadata.ReadingProgression) } func TestPackageDocPageProgressionLTR(t *testing.T) { - p, err := loadPackageDoc("progression-ltr") + p, err := loadPackageDoc(t.Context(), "progression-ltr") assert.NoError(t, err) assert.Equal(t, manifest.LTR, p.Metadata.ReadingProgression) } func TestPackageDocPageProgressionRTL(t *testing.T) { - p, err := loadPackageDoc("progression-rtl") + p, err := loadPackageDoc(t.Context(), "progression-rtl") assert.NoError(t, err) assert.Equal(t, manifest.RTL, p.Metadata.ReadingProgression) } func TestPackageDocLinkPropertiesContains(t *testing.T) { - p, err := loadPackageDoc("links-properties") + p, err := loadPackageDoc(t.Context(), "links-properties") assert.NoError(t, err) ro := p.ReadingOrder assert.Equal(t, []string{"mathml"}, ro[0].Properties.Contains()) @@ -70,7 +71,7 @@ func TestPackageDocLinkPropertiesContains(t *testing.T) { } func TestPackageDocLinkPropertiesRels(t *testing.T) { - p, err := loadPackageDoc("links-properties") + p, err := loadPackageDoc(t.Context(), "links-properties") assert.NoError(t, err) ro := p.ReadingOrder assert.Equal(t, manifest.Strings{"cover"}, p.Resources[0].Rels) @@ -82,7 +83,7 @@ func TestPackageDocLinkPropertiesRels(t *testing.T) { } func TestPackageDocLinkPropertiesPresentation(t *testing.T) { - p, err := loadPackageDoc("links-properties") + p, err := loadPackageDoc(t.Context(), "links-properties") assert.NoError(t, err) ro := p.ReadingOrder assert.Equal(t, ro[0].Properties.Layout(), manifest.EPUBLayoutFixed) @@ -111,7 +112,7 @@ func TestPackageDocLinkPropertiesPresentation(t *testing.T) { } func TestPackageDocLinkReadingOrder(t *testing.T) { - p, err := loadPackageDoc("links") + p, err := loadPackageDoc(t.Context(), "links") assert.NoError(t, err) assert.Equal(t, manifest.LinkList{ @@ -127,7 +128,7 @@ func TestPackageDocLinkReadingOrder(t *testing.T) { } func TestPackageDocLinkResources(t *testing.T) { - p, err := loadPackageDoc("links") + p, err := loadPackageDoc(t.Context(), "links") assert.NoError(t, err) ft := mediatype.OfString("application/vnd.ms-opentype") @@ -175,7 +176,7 @@ func TestPackageDocLinkResources(t *testing.T) { } /*func TestPackageDocLinkFallbacksMappedToAlternates(t *testing.T) { - p, err := loadPackageDoc("fallbacks") + p, err := loadPackageDoc(t.Context(), "fallbacks") assert.NoError(t, err) assert.Equal(t, manifest.LinkList{}, p.Resources) @@ -183,7 +184,7 @@ func TestPackageDocLinkResources(t *testing.T) { }*/ func TestPackageDocLinkFallbacksCircularDependencies(t *testing.T) { - _, err := loadPackageDoc("fallbacks-termination") + _, err := loadPackageDoc(t.Context(), "fallbacks-termination") assert.NoError(t, err) // t.Logf("%+v\n", p) } diff --git a/pkg/parser/epub/parser_smil_test.go b/pkg/parser/epub/parser_smil_test.go index ab9b9b33..dcd1ca69 100644 --- a/pkg/parser/epub/parser_smil_test.go +++ b/pkg/parser/epub/parser_smil_test.go @@ -1,6 +1,7 @@ package epub import ( + "context" "testing" "github.com/readium/go-toolkit/pkg/fetcher" @@ -9,8 +10,8 @@ import ( "github.com/stretchr/testify/assert" ) -func loadSmil(name string) (*manifest.GuidedNavigationDocument, error) { - n, rerr := fetcher.NewFileResource(manifest.Link{}, "./testdata/smil/"+name+".smil").ReadAsXML(map[string]string{ +func loadSmil(ctx context.Context, name string) (*manifest.GuidedNavigationDocument, error) { + n, rerr := fetcher.ReadResourceAsXML(ctx, fetcher.NewFileResource(manifest.Link{}, "./testdata/smil/"+name+".smil"), map[string]string{ NamespaceOPS: "epub", NamespaceSMIL: "smil", NamespaceSMIL2: "smil2", @@ -23,7 +24,7 @@ func loadSmil(name string) (*manifest.GuidedNavigationDocument, error) { } func TestSMILDocTypicalAudio(t *testing.T) { - doc, err := loadSmil("audio1") + doc, err := loadSmil(t.Context(), "audio1") if !assert.NoError(t, err) { return } @@ -37,13 +38,13 @@ func TestSMILDocTypicalAudio(t *testing.T) { func TestSMILW3Examples(t *testing.T) { // Examples from the EPUB Media Overlay spec from W3 for _, v := range []string{"w3-2", "w3-3", "w3-4", "w3-8", "w3-10"} { - _, err := loadSmil(v) + _, err := loadSmil(t.Context(), v) assert.NoError(t, err) } } func TestSMILClipBoundaries(t *testing.T) { - doc, err := loadSmil("audio-clip") + doc, err := loadSmil(t.Context(), "audio-clip") if !assert.NoError(t, err) { return } diff --git a/pkg/parser/epub/positions_service.go b/pkg/parser/epub/positions_service.go index 48bf17e8..497592bc 100644 --- a/pkg/parser/epub/positions_service.go +++ b/pkg/parser/epub/positions_service.go @@ -1,6 +1,7 @@ package epub import ( + "context" "math" "github.com/readium/go-toolkit/pkg/fetcher" @@ -31,13 +32,13 @@ func (s *PositionsService) Links() manifest.LinkList { return manifest.LinkList{pub.PositionsLink} } -func (s *PositionsService) Get(link manifest.Link) (fetcher.Resource, bool) { - return pub.GetForPositionsService(s, link) +func (s *PositionsService) Get(ctx context.Context, link manifest.Link) (fetcher.Resource, bool) { + return pub.GetForPositionsService(ctx, s, link) } // Positions implements pub.PositionsService -func (s *PositionsService) Positions() []manifest.Locator { - poss := s.PositionsByReadingOrder() +func (s *PositionsService) Positions(ctx context.Context) []manifest.Locator { + poss := s.PositionsByReadingOrder(ctx) positions := make([]manifest.Locator, 0, len(poss)) // At least 1 link per RO element for _, v := range poss { positions = append(positions, v...) @@ -46,14 +47,14 @@ func (s *PositionsService) Positions() []manifest.Locator { } // PositionsByReadingOrder implements PositionsService -func (s *PositionsService) PositionsByReadingOrder() [][]manifest.Locator { +func (s *PositionsService) PositionsByReadingOrder(ctx context.Context) [][]manifest.Locator { if len(s.positions) == 0 { - s.positions = s.computePositions() + s.positions = s.computePositions(ctx) } return s.positions } -func (s *PositionsService) computePositions() [][]manifest.Locator { +func (s *PositionsService) computePositions(ctx context.Context) [][]manifest.Locator { var lastPositionOfPreviousResource uint positions := make([][]manifest.Locator, len(s.readingOrder)) for i, link := range s.readingOrder { @@ -61,7 +62,7 @@ func (s *PositionsService) computePositions() [][]manifest.Locator { if s.presentation.LayoutOf(link) == manifest.EPUBLayoutFixed { lpositions = s.createFixed(link, lastPositionOfPreviousResource) } else { - lpositions = s.createReflowable(link, lastPositionOfPreviousResource, s.fetcher) + lpositions = s.createReflowable(ctx, link, lastPositionOfPreviousResource, s.fetcher) } if len(lpositions) > 0 { pos := lpositions[len(lpositions)-1].Locations.Position @@ -93,8 +94,8 @@ func (s *PositionsService) createFixed(link manifest.Link, startPosition uint) [ return []manifest.Locator{s.createLocator(link, 0, startPosition+1)} } -func (s *PositionsService) createReflowable(link manifest.Link, startPosition uint, fetcher fetcher.Fetcher) []manifest.Locator { - resource := fetcher.Get(link) +func (s *PositionsService) createReflowable(ctx context.Context, link manifest.Link, startPosition uint, fetcher fetcher.Fetcher) []manifest.Locator { + resource := fetcher.Get(ctx, link) defer resource.Close() positionCount := s.reflowableStrategy.PositionCount(resource) @@ -154,13 +155,13 @@ type OriginalLength struct { } // PositionCount implements ReflowableStrategy -func (l OriginalLength) PositionCount(resource fetcher.Resource) uint { +func (l OriginalLength) PositionCount(ctx context.Context, resource fetcher.Resource) uint { var length int64 lnk := resource.Link() if enc := lnk.Properties.Encryption(); enc != nil { length = enc.OriginalLength } else { - length, _ = resource.Length() + length, _ = resource.Length(ctx) } return uint(math.Min(math.Ceil(float64(length)/float64(l.PageLength)), 1)) diff --git a/pkg/parser/epub/positions_service_test.go b/pkg/parser/epub/positions_service_test.go index 88c9092b..a4414cc9 100644 --- a/pkg/parser/epub/positions_service_test.go +++ b/pkg/parser/epub/positions_service_test.go @@ -8,7 +8,7 @@ import ( func TestEPUBPositionsServiceEmptyReadingOrder(t *testing.T) { service := PositionsService{} - assert.Equal(t, 0, len(service.Positions())) + assert.Equal(t, 0, len(service.Positions(t.Context()))) } // TODO replicate `createService` tester from Kotlin diff --git a/pkg/parser/epub/utils.go b/pkg/parser/epub/utils.go index 09e9b182..9d6b453c 100644 --- a/pkg/parser/epub/utils.go +++ b/pkg/parser/epub/utils.go @@ -1,19 +1,22 @@ package epub import ( + "context" "strconv" "strings" "github.com/pkg/errors" "github.com/readium/go-toolkit/pkg/fetcher" + ftchr "github.com/readium/go-toolkit/pkg/fetcher" "github.com/readium/go-toolkit/pkg/manifest" "github.com/readium/go-toolkit/pkg/util/url" "github.com/readium/xmlquery" ) -func GetRootFilePath(fetcher fetcher.Fetcher) (url.URL, error) { - res := fetcher.Get(manifest.Link{Href: manifest.MustNewHREFFromString("META-INF/container.xml", false)}) - xml, err := res.ReadAsXML(map[string]string{ +func GetRootFilePath(ctx context.Context, fetcher fetcher.Fetcher) (url.URL, error) { + res := fetcher.Get(ctx, manifest.Link{Href: manifest.MustNewHREFFromString("META-INF/container.xml", false)}) + + xml, err := ftchr.ReadResourceAsXML(ctx, res, map[string]string{ "urn:oasis:names:tc:opendocument:xmlns:container": "cn", }) if err != nil { diff --git a/pkg/parser/parser.go b/pkg/parser/parser.go index 399df28f..0e566619 100644 --- a/pkg/parser/parser.go +++ b/pkg/parser/parser.go @@ -1,11 +1,13 @@ package parser import ( + "context" + "github.com/readium/go-toolkit/pkg/asset" "github.com/readium/go-toolkit/pkg/fetcher" "github.com/readium/go-toolkit/pkg/pub" ) type PublicationParser interface { - Parse(asset asset.PublicationAsset, f fetcher.Fetcher) (*pub.Builder, error) + Parse(ctx context.Context, asset asset.PublicationAsset, f fetcher.Fetcher) (*pub.Builder, error) } diff --git a/pkg/parser/parser_audio.go b/pkg/parser/parser_audio.go index d8809a28..0b4baf6c 100644 --- a/pkg/parser/parser_audio.go +++ b/pkg/parser/parser_audio.go @@ -1,6 +1,7 @@ package parser import ( + "context" "errors" "path/filepath" "sort" @@ -19,12 +20,12 @@ import ( type AudioParser struct{} // Parse implements PublicationParser -func (p AudioParser) Parse(asset asset.PublicationAsset, fetcher fetcher.Fetcher) (*pub.Builder, error) { - if !p.accepts(asset, fetcher) { +func (p AudioParser) Parse(ctx context.Context, asset asset.PublicationAsset, fetcher fetcher.Fetcher) (*pub.Builder, error) { + if !p.accepts(ctx, asset, fetcher) { return nil, nil } - links, err := fetcher.Links() + links, err := fetcher.Links(ctx) if err != nil { return nil, err } @@ -54,7 +55,7 @@ func (p AudioParser) Parse(asset asset.PublicationAsset, fetcher fetcher.Fetcher }) // Try to figure out the publication's title - title := guessPublicationTitleFromFileStructure(fetcher) + title := guessPublicationTitleFromFileStructure(ctx, fetcher) if title == "" { title = asset.Name() } @@ -80,11 +81,11 @@ var allowed_extensions_audio = map[string]struct{}{ "ogg": {}, "oga": {}, "mogg": {}, "opus": {}, "wav": {}, "webm": {}, } -func (p AudioParser) accepts(asset asset.PublicationAsset, fetcher fetcher.Fetcher) bool { - if asset.MediaType().Equal(&mediatype.ZAB) { +func (p AudioParser) accepts(ctx context.Context, asset asset.PublicationAsset, fetcher fetcher.Fetcher) bool { + if asset.MediaType(ctx).Equal(&mediatype.ZAB) { return true } - links, err := fetcher.Links() + links, err := fetcher.Links(ctx) if err != nil { // TODO log return false diff --git a/pkg/parser/parser_image.go b/pkg/parser/parser_image.go index 2dd2f271..5433c14b 100644 --- a/pkg/parser/parser_image.go +++ b/pkg/parser/parser_image.go @@ -1,6 +1,7 @@ package parser import ( + "context" "errors" "path/filepath" "sort" @@ -19,12 +20,12 @@ import ( type ImageParser struct{} // Parse implements PublicationParser -func (p ImageParser) Parse(asset asset.PublicationAsset, fetcher fetcher.Fetcher) (*pub.Builder, error) { - if ok, err := p.accepts(asset, fetcher); err != nil || !ok { +func (p ImageParser) Parse(ctx context.Context, asset asset.PublicationAsset, fetcher fetcher.Fetcher) (*pub.Builder, error) { + if ok, err := p.accepts(ctx, asset, fetcher); err != nil || !ok { return nil, err } - links, err := fetcher.Links() + links, err := fetcher.Links(ctx) if err != nil { return nil, err } @@ -49,7 +50,7 @@ func (p ImageParser) Parse(asset asset.PublicationAsset, fetcher fetcher.Fetcher }) // Try to figure out the publication's title - title := guessPublicationTitleFromFileStructure(fetcher) + title := guessPublicationTitleFromFileStructure(ctx, fetcher) if title == "" { title = asset.Name() } @@ -74,11 +75,11 @@ func (p ImageParser) Parse(asset asset.PublicationAsset, fetcher fetcher.Fetcher var allowed_extensions_image = map[string]struct{}{"acbf": {}, "xml": {}, "txt": {}, "json": {}} -func (p ImageParser) accepts(asset asset.PublicationAsset, fetcher fetcher.Fetcher) (bool, error) { - if asset.MediaType().Equal(&mediatype.CBZ) || asset.MediaType().Equal(&mediatype.CBR) { +func (p ImageParser) accepts(ctx context.Context, asset asset.PublicationAsset, fetcher fetcher.Fetcher) (bool, error) { + if asset.MediaType(ctx).Equal(&mediatype.CBZ) || asset.MediaType(ctx).Equal(&mediatype.CBR) { return true, nil } - links, err := fetcher.Links() + links, err := fetcher.Links(ctx) if err != nil { return false, err } diff --git a/pkg/parser/parser_image_test.go b/pkg/parser/parser_image_test.go index 66f81c6b..77bbed2e 100644 --- a/pkg/parser/parser_image_test.go +++ b/pkg/parser/parser_image_test.go @@ -12,12 +12,13 @@ import ( ) func withImageParser(t *testing.T, filepath string, f func(*pub.Builder)) { - a := asset.File(filepath) - fet, err := a.CreateFetcher(asset.Dependencies{ + u, _ := url.FromFilepath(filepath) + a := asset.File(u) + fet, err := a.CreateFetcher(t.Context(), asset.Dependencies{ ArchiveFactory: archive.NewArchiveFactory(), }, "") assert.NoError(t, err) - p, err := ImageParser{}.Parse(a, fet) + p, err := ImageParser{}.Parse(t.Context(), a, fet) assert.NoError(t, err) f(p) } diff --git a/pkg/parser/parser_readium_webpub.go b/pkg/parser/parser_readium_webpub.go index bd1cb673..878b3b5f 100644 --- a/pkg/parser/parser_readium_webpub.go +++ b/pkg/parser/parser_readium_webpub.go @@ -1,11 +1,12 @@ package parser import ( + "context" "net/http" "github.com/pkg/errors" "github.com/readium/go-toolkit/pkg/asset" - "github.com/readium/go-toolkit/pkg/fetcher" + ftchr "github.com/readium/go-toolkit/pkg/fetcher" "github.com/readium/go-toolkit/pkg/manifest" "github.com/readium/go-toolkit/pkg/mediatype" "github.com/readium/go-toolkit/pkg/pub" @@ -23,9 +24,9 @@ func NewWebPubParser(client *http.Client) WebPubParser { } // Parse implements PublicationParser -func (p WebPubParser) Parse(asset asset.PublicationAsset, fetcher fetcher.Fetcher) (*pub.Builder, error) { +func (p WebPubParser) Parse(ctx context.Context, asset asset.PublicationAsset, fetcher ftchr.Fetcher) (*pub.Builder, error) { lFetcher := fetcher - mediaType := asset.MediaType() + mediaType := asset.MediaType(ctx) if !isMediatypeReadiumWebPubProfile(mediaType) { return nil, nil @@ -35,22 +36,22 @@ func (p WebPubParser) Parse(asset asset.PublicationAsset, fetcher fetcher.Fetche var manifestJSON map[string]interface{} if isPackage { - res := lFetcher.Get(manifest.Link{Href: manifest.MustNewHREFFromString("manifest.json", false)}) - mjr, err := res.ReadAsJSON() + res := lFetcher.Get(ctx, manifest.Link{Href: manifest.MustNewHREFFromString("manifest.json", false)}) + mjr, err := ftchr.ReadResourceAsJSON(ctx, res) if err != nil { return nil, err } manifestJSON = mjr } else { // For a single manifest file, reads the first (and only) file in the fetcher. - links, err := lFetcher.Links() + links, err := lFetcher.Links(ctx) if err != nil { return nil, err } if len(links) == 0 { return nil, errors.New("links is empty") } - mj, rerr := lFetcher.Get(links[0]).ReadAsJSON() + mj, rerr := ftchr.ReadResourceAsJSON(ctx, lFetcher.Get(ctx, links[0])) if rerr != nil { return nil, rerr.Cause } diff --git a/pkg/parser/pdf/parser.go b/pkg/parser/pdf/parser.go index ec8f8759..ed2b98f7 100644 --- a/pkg/parser/pdf/parser.go +++ b/pkg/parser/pdf/parser.go @@ -1,6 +1,8 @@ package pdf import ( + "context" + "github.com/pdfcpu/pdfcpu/pkg/pdfcpu" "github.com/pdfcpu/pdfcpu/pkg/pdfcpu/model" "github.com/pdfcpu/pdfcpu/pkg/pdfcpu/validate" @@ -25,14 +27,14 @@ func init() { } // Parse implements PublicationParser -func (p Parser) Parse(asset asset.PublicationAsset, f fetcher.Fetcher) (*pub.Builder, error) { +func (p Parser) Parse(ctx context.Context, asset asset.PublicationAsset, f fetcher.Fetcher) (*pub.Builder, error) { fallbackTitle := asset.Name() - if !asset.MediaType().Equal(&mediatype.PDF) { + if !asset.MediaType(ctx).Equal(&mediatype.PDF) { return nil, nil } - links, err := f.Links() + links, err := f.Links(ctx) if err != nil { return nil, errors.Wrap(err, "unable to fetch links") } @@ -46,17 +48,17 @@ func (p Parser) Parse(asset asset.PublicationAsset, f fetcher.Fetcher) (*pub.Bui conf := model.NewDefaultConfiguration() conf.ValidationMode = model.ValidationRelaxed - ctx, err := pdfcpu.Read(fetcher.NewResourceReadSeeker(f.Get(*link)), conf) + c, err := pdfcpu.Read(fetcher.NewResourceReadSeeker(f.Get(ctx, *link)), conf) if err != nil { return nil, errors.Wrap(err, "failed opening PDF") } // Clean up and prepare document - validate.XRefTable(ctx) - pdfcpu.OptimizeXRefTable(ctx) - ctx.EnsurePageCount() + validate.XRefTable(c) + pdfcpu.OptimizeXRefTable(c) + c.EnsurePageCount() - m, err := ParseMetadata(ctx, link) + m, err := ParseMetadata(c, link) if err != nil { return nil, errors.Wrap(err, "failed parsing PDF metadata") } diff --git a/pkg/parser/pdf/positions_service.go b/pkg/parser/pdf/positions_service.go index 4b2d7ac0..cd9b1ed5 100644 --- a/pkg/parser/pdf/positions_service.go +++ b/pkg/parser/pdf/positions_service.go @@ -1,6 +1,7 @@ package pdf import ( + "context" "fmt" "github.com/readium/go-toolkit/pkg/fetcher" @@ -24,13 +25,13 @@ func (s *PositionsService) Links() manifest.LinkList { return manifest.LinkList{pub.PositionsLink} } -func (s *PositionsService) Get(link manifest.Link) (fetcher.Resource, bool) { - return pub.GetForPositionsService(s, link) +func (s *PositionsService) Get(ctx context.Context, link manifest.Link) (fetcher.Resource, bool) { + return pub.GetForPositionsService(ctx, s, link) } // Positions implements pub.PositionsService -func (s *PositionsService) Positions() []manifest.Locator { - poss := s.PositionsByReadingOrder() +func (s *PositionsService) Positions(ctx context.Context) []manifest.Locator { + poss := s.PositionsByReadingOrder(ctx) var positions []manifest.Locator for _, v := range poss { positions = append(positions, v...) @@ -39,7 +40,7 @@ func (s *PositionsService) Positions() []manifest.Locator { } // PositionsByReadingOrder implements PositionsService -func (s *PositionsService) PositionsByReadingOrder() [][]manifest.Locator { +func (s *PositionsService) PositionsByReadingOrder(ctx context.Context) [][]manifest.Locator { if len(s.positions) == 0 { s.positions = s.computePositions() } diff --git a/pkg/parser/utils.go b/pkg/parser/utils.go index 7a0b360b..6b6f1bfb 100644 --- a/pkg/parser/utils.go +++ b/pkg/parser/utils.go @@ -1,6 +1,7 @@ package parser import ( + "context" "strings" "github.com/readium/go-toolkit/pkg/fetcher" @@ -23,8 +24,8 @@ func hrefCommonFirstComponent(links manifest.LinkList) string { return latest } -func guessPublicationTitleFromFileStructure(fetcher fetcher.Fetcher) string { // TODO test for this - links, err := fetcher.Links() +func guessPublicationTitleFromFileStructure(ctx context.Context, fetcher fetcher.Fetcher) string { // TODO test for this + links, err := fetcher.Links(ctx) if err != nil || len(links) == 0 { return "" } diff --git a/pkg/drm/drm.go b/pkg/protection/drm.go similarity index 63% rename from pkg/drm/drm.go rename to pkg/protection/drm.go index 1b57b6f5..8e7f1157 100644 --- a/pkg/drm/drm.go +++ b/pkg/protection/drm.go @@ -1,8 +1,8 @@ -package drm +package protection -// TODO DRM class const ( - SchemeLCP = "http://readium.org/2014/01/lcp" + SchemeLCP = "http://readium.org/2014/01/lcp" + SchemeAdept = "http://ns.adobe.com/adept" ) // TODO replace with ContentProtection API diff --git a/pkg/pub/publication.go b/pkg/pub/publication.go index c6bec3c4..fa67568d 100644 --- a/pkg/pub/publication.go +++ b/pkg/pub/publication.go @@ -1,6 +1,7 @@ package pub import ( + "context" "encoding/json" "github.com/readium/go-toolkit/pkg/fetcher" @@ -53,9 +54,9 @@ func (p Publication) JSONManifest() (string, error) { return string(bin), nil } -func (p Publication) PositionsFromManifest() []manifest.Locator { +func (p Publication) PositionsFromManifest(ctx context.Context) []manifest.Locator { // TODO just access the service directly and don't marshal and unmarshal JSON? - data, err := p.Get(PositionsLink).ReadAsJSON() + data, err := fetcher.ReadResourceAsJSON(ctx, p.Get(ctx, PositionsLink)) if err != nil || data == nil { return []manifest.Locator{} } @@ -64,6 +65,9 @@ func (p Publication) PositionsFromManifest() []manifest.Locator { return []manifest.Locator{} } positions, ok := rawPositions.([]map[string]interface{}) + if !ok { + return []manifest.Locator{} + } locators := make([]manifest.Locator, len(positions)) for i, rl := range positions { locator, _ := manifest.LocatorFromJSON(rl) @@ -72,20 +76,20 @@ func (p Publication) PositionsFromManifest() []manifest.Locator { return locators } -func (p Publication) PositionsByReadingOrder() [][]manifest.Locator { +func (p Publication) PositionsByReadingOrder(ctx context.Context) [][]manifest.Locator { service := p.FindService(PositionsService_Name) if service == nil { return nil } - return service.(PositionsService).PositionsByReadingOrder() + return service.(PositionsService).PositionsByReadingOrder(ctx) } -func (p *Publication) Positions() []manifest.Locator { +func (p *Publication) Positions(ctx context.Context) []manifest.Locator { service := p.FindService(PositionsService_Name) if service == nil { return nil } - return service.(PositionsService).Positions() + return service.(PositionsService).Positions(ctx) } // The URL where this publication is served, computed from the [Link] with `self` relation. @@ -122,13 +126,13 @@ func (p Publication) FindServices(serviceName string) []Service { } // Returns the resource targeted by the given non-templated [link]. -func (p Publication) Get(link manifest.Link) fetcher.Resource { +func (p Publication) Get(ctx context.Context, link manifest.Link) fetcher.Resource { for _, service := range p.services { - if l, ok := service.Get(link); ok { + if l, ok := service.Get(ctx, link); ok { return l } } - return p.Fetcher.Get(link) + return p.Fetcher.Get(ctx, link) } // Free up resources associated with the publication diff --git a/pkg/pub/service.go b/pkg/pub/service.go index db5e8954..ac6901a1 100644 --- a/pkg/pub/service.go +++ b/pkg/pub/service.go @@ -1,6 +1,8 @@ package pub import ( + "context" + "github.com/readium/go-toolkit/pkg/fetcher" "github.com/readium/go-toolkit/pkg/manifest" ) @@ -17,9 +19,9 @@ const ( // Base interface to be implemented by all publication services. type Service interface { - Links() manifest.LinkList // Links to be added to the publication - Get(link manifest.Link) (fetcher.Resource, bool) // A service can return a Resource that supplements, replaces or compensates for other links - Close() // Closes any opened file handles, removes temporary files, etc. + Links() manifest.LinkList // Links to be added to the publication + Get(ctx context.Context, link manifest.Link) (fetcher.Resource, bool) // A service can return a Resource that supplements, replaces or compensates for other links + Close() // Closes any opened file handles, removes temporary files, etc. } // Container for the context from which a service is created. diff --git a/pkg/pub/service_content.go b/pkg/pub/service_content.go index 672e731c..6e881909 100644 --- a/pkg/pub/service_content.go +++ b/pkg/pub/service_content.go @@ -1,6 +1,7 @@ package pub import ( + "context" "encoding/json" "github.com/readium/go-toolkit/pkg/content" @@ -34,12 +35,12 @@ type DefaultContentService struct { resourceContentIteratorFactories []iterator.ResourceContentIteratorFactory } -func GetForContentService(service ContentService, link manifest.Link) (fetcher.Resource, bool) { +func GetForContentService(ctx context.Context, service ContentService, link manifest.Link) (fetcher.Resource, bool) { if link.Href != ContentLink.Href { return nil, false } - elements, err := content.ContentElements(service.Content(nil)) + elements, err := content.ContentElements(ctx, service.Content(nil)) if err != nil { return fetcher.NewFailureResource(ContentLink, fetcher.Other(err)), false } @@ -57,8 +58,8 @@ func (s DefaultContentService) Links() manifest.LinkList { return manifest.LinkList{ContentLink} } -func (s DefaultContentService) Get(link manifest.Link) (fetcher.Resource, bool) { - return GetForContentService(s, link) +func (s DefaultContentService) Get(ctx context.Context, link manifest.Link) (fetcher.Resource, bool) { + return GetForContentService(ctx, s, link) } func (s DefaultContentService) Content(start *manifest.Locator) content.Content { @@ -84,12 +85,12 @@ func (c contentImplementation) Iterator() iterator.Iterator { ) } -func (c contentImplementation) Elements() ([]element.Element, error) { - return content.ContentElements(c) +func (c contentImplementation) Elements(ctx context.Context) ([]element.Element, error) { + return content.ContentElements(ctx, c) } -func (c contentImplementation) Text(separator *string) (string, error) { - return content.ContentText(c, separator) +func (c contentImplementation) Text(ctx context.Context, separator *string) (string, error) { + return content.ContentText(ctx, c, separator) } func DefaultContentServiceFactory(resourceContentIteratorFactories []iterator.ResourceContentIteratorFactory) ServiceFactory { diff --git a/pkg/pub/service_guided_navigation.go b/pkg/pub/service_guided_navigation.go index bf879364..980d38d6 100644 --- a/pkg/pub/service_guided_navigation.go +++ b/pkg/pub/service_guided_navigation.go @@ -1,6 +1,7 @@ package pub import ( + "context" "encoding/json" "github.com/pkg/errors" @@ -26,11 +27,11 @@ func init() { // Provides a way to access guided navigation documents for resources of a [Publication]. type GuidedNavigationService interface { Service - GuideForResource(href string) (*manifest.GuidedNavigationDocument, error) + GuideForResource(ctx context.Context, href string) (*manifest.GuidedNavigationDocument, error) HasGuideForResource(href string) bool } -func GetForGuidedNavigationService(service GuidedNavigationService, link manifest.Link) (fetcher.Resource, bool) { +func GetForGuidedNavigationService(ctx context.Context, service GuidedNavigationService, link manifest.Link) (fetcher.Resource, bool) { u := link.URL(nil, nil) if u.Path() != resolvedGuidedNavigation.Path() { @@ -62,7 +63,7 @@ func GetForGuidedNavigationService(service GuidedNavigationService, link manifes } return fetcher.NewBytesResource(link, func() []byte { - doc, err := service.GuideForResource(ref) + doc, err := service.GuideForResource(ctx, ref) if err != nil { // TODO: handle error somehow return nil diff --git a/pkg/pub/service_positions.go b/pkg/pub/service_positions.go index 8e06f510..1bb924c8 100644 --- a/pkg/pub/service_positions.go +++ b/pkg/pub/service_positions.go @@ -1,6 +1,7 @@ package pub import ( + "context" "encoding/json" "github.com/readium/go-toolkit/pkg/fetcher" @@ -18,8 +19,8 @@ var PositionsLink = manifest.Link{ // Provides a list of discrete locations in the publication, no matter what the original format is. type PositionsService interface { Service - PositionsByReadingOrder() [][]manifest.Locator // Returns the list of all the positions in the publication, grouped by the resource reading order index. - Positions() []manifest.Locator // Returns the list of all the positions in the publication. (flattening of PositionsByReadingOrder) + PositionsByReadingOrder(ctx context.Context) [][]manifest.Locator // Returns the list of all the positions in the publication, grouped by the resource reading order index. + Positions(ctx context.Context) []manifest.Locator // Returns the list of all the positions in the publication. (flattening of PositionsByReadingOrder) } // PerResourcePositionsService implements PositionsService @@ -29,13 +30,13 @@ type PerResourcePositionsService struct { fallbackMediaType mediatype.MediaType } -func GetForPositionsService(service PositionsService, link manifest.Link) (fetcher.Resource, bool) { +func GetForPositionsService(ctx context.Context, service PositionsService, link manifest.Link) (fetcher.Resource, bool) { if !link.URL(nil, nil).Equivalent(PositionsLink.URL(nil, nil)) { return nil, false } return fetcher.NewBytesResource(PositionsLink, func() []byte { - positions := service.Positions() + positions := service.Positions(ctx) bin, _ := json.Marshal(map[string]interface{}{ "total": len(positions), "positions": positions, @@ -50,12 +51,12 @@ func (s PerResourcePositionsService) Links() manifest.LinkList { return manifest.LinkList{PositionsLink} } -func (s PerResourcePositionsService) Get(link manifest.Link) (fetcher.Resource, bool) { - return GetForPositionsService(s, link) +func (s PerResourcePositionsService) Get(ctx context.Context, link manifest.Link) (fetcher.Resource, bool) { + return GetForPositionsService(ctx, s, link) } -func (s PerResourcePositionsService) Positions() []manifest.Locator { - poss := s.PositionsByReadingOrder() +func (s PerResourcePositionsService) Positions(ctx context.Context) []manifest.Locator { + poss := s.PositionsByReadingOrder(ctx) positions := make([]manifest.Locator, len(poss)) for i, v := range poss { positions[i] = v[0] // Always just one element @@ -63,7 +64,7 @@ func (s PerResourcePositionsService) Positions() []manifest.Locator { return positions } -func (s PerResourcePositionsService) PositionsByReadingOrder() [][]manifest.Locator { +func (s PerResourcePositionsService) PositionsByReadingOrder(ctx context.Context) [][]manifest.Locator { positions := make([][]manifest.Locator, len(s.readingOrder)) pageCount := len(s.readingOrder) for i, v := range s.readingOrder { diff --git a/pkg/pub/service_positions_test.go b/pkg/pub/service_positions_test.go index 0b3813a3..f2313f8c 100644 --- a/pkg/pub/service_positions_test.go +++ b/pkg/pub/service_positions_test.go @@ -12,7 +12,7 @@ import ( func TestPerResourcePositionsServiceEmptyReadingOrder(t *testing.T) { service := PerResourcePositionsService{} - assert.Equal(t, 0, len(service.Positions())) + assert.Equal(t, 0, len(service.Positions(t.Context()))) } func TestPerResourcePositionsServiceSingleReadingOrder(t *testing.T) { @@ -27,7 +27,7 @@ func TestPerResourcePositionsServiceSingleReadingOrder(t *testing.T) { Position: extensions.Pointer(uint(1)), TotalProgression: extensions.Pointer(float64(0.0)), }, - }}, service.Positions()) + }}, service.Positions(t.Context())) } func TestPerResourcePositionsServiceMultiReadingOrder(t *testing.T) { @@ -66,7 +66,7 @@ func TestPerResourcePositionsServiceMultiReadingOrder(t *testing.T) { TotalProgression: extensions.Pointer(float64(2.0 / 3.0)), }, }, - }, service.Positions()) + }, service.Positions(t.Context())) } func TestPerResourcePositionsServiceMediaTypeFallback(t *testing.T) { @@ -83,5 +83,5 @@ func TestPerResourcePositionsServiceMediaTypeFallback(t *testing.T) { Position: extensions.Pointer(uint(1)), TotalProgression: extensions.Pointer(float64(0.0)), }, - }}, service.Positions()) + }}, service.Positions(t.Context())) } diff --git a/pkg/streamer/streamer.go b/pkg/streamer/streamer.go index 40198c8c..f12a7af1 100644 --- a/pkg/streamer/streamer.go +++ b/pkg/streamer/streamer.go @@ -1,6 +1,7 @@ package streamer import ( + "context" "net/http" "github.com/pkg/errors" @@ -54,7 +55,7 @@ const ( func New(config Config) Streamer { // TODO contentProtections if config.HttpClient == nil { - config.HttpClient = http.DefaultClient + config.HttpClient = http.DefaultClient // TODO: better default HTTP client } if config.ArchiveFactory == nil { config.ArchiveFactory = archive.NewArchiveFactory() @@ -82,8 +83,8 @@ func New(config Config) Streamer { // TODO contentProtections } // Parses a [Publication] from the given asset. -func (s Streamer) Open(a asset.PublicationAsset, credentials string) (*pub.Publication, error) { - fetcher, err := a.CreateFetcher(asset.Dependencies{ +func (s Streamer) Open(ctx context.Context, a asset.PublicationAsset, credentials string) (*pub.Publication, error) { + fetcher, err := a.CreateFetcher(ctx, asset.Dependencies{ ArchiveFactory: s.archiveFactory, }, credentials) if err != nil { @@ -94,7 +95,7 @@ func (s Streamer) Open(a asset.PublicationAsset, credentials string) (*pub.Publi var builder *pub.Builder for _, parser := range s.parsers { - pb, err := parser.Parse(a, fetcher) + pb, err := parser.Parse(ctx, a, fetcher) if err != nil { fetcher.Close() return nil, errors.Wrap(err, "failed parsing asset") @@ -116,7 +117,7 @@ func (s Streamer) Open(a asset.PublicationAsset, credentials string) (*pub.Publi s.inferA11yMetadataInPublication(pub) if s.inferPageCount && pub.Manifest.Metadata.NumberOfPages == nil { - pageCount := uint(len(pub.Positions())) + pageCount := uint(len(pub.Positions(ctx))) if pageCount > 0 { pub.Manifest.Metadata.NumberOfPages = &pageCount } diff --git a/pkg/util/url/scheme.go b/pkg/util/url/scheme.go index a1377553..bc6261dc 100644 --- a/pkg/util/url/scheme.go +++ b/pkg/util/url/scheme.go @@ -15,24 +15,14 @@ const ( SchemeFile Scheme = "file" ) +var ( + BaseFile, _ = AbsoluteURLFromString("file:///") +) + func SchemeFromString(s string) Scheme { s = strings.ToLower(s) switch s { - case "http": - fallthrough - case "https": - fallthrough - case "data": - fallthrough - case "ftp": - fallthrough - case "s3": - fallthrough - case "gs": - fallthrough - case "opds": - fallthrough - case "file": + case "http", "https", "data", "ftp", "s3", "gs", "opds", "file": return Scheme(s) default: // Not a known scheme. diff --git a/pkg/util/url/url.go b/pkg/util/url/url.go index cda4c22c..208aedbc 100644 --- a/pkg/util/url/url.go +++ b/pkg/util/url/url.go @@ -355,6 +355,11 @@ func (u AbsoluteURL) IsFile() bool { return u.scheme.IsFile() } +// Indicates whether this URL points to a cloud resource. +func (u AbsoluteURL) IsCloud() bool { + return u.scheme.IsCloud() +} + // Converts the URL to a filepath, if it's a file URL. func (u AbsoluteURL) ToFilepath() string { if !u.IsFile() { diff --git a/pkg/util/url/url_remote.go b/pkg/util/url/url_remote.go new file mode 100644 index 00000000..4f2cc4fe --- /dev/null +++ b/pkg/util/url/url_remote.go @@ -0,0 +1,37 @@ +package url + +import ( + "errors" + "path" + "strings" + + "cloud.google.com/go/storage" + "github.com/aws/aws-sdk-go-v2/service/s3" +) + +// Turns an absolute URL into an S3 object +// We could theoretically accept https S3 URLs like s3.amazonaws.com, +// but the potential endpoints are way to complex, and this would also +// exclude third-party services that have S3 compatibility. Instead, +// the user of the toolkit should turn their data into an s3 URI, meaning +// the structure s3:/// +func (u AbsoluteURL) ToS3Object() (*s3.GetObjectInput, error) { + if u.scheme != "s3" { + return nil, errors.New("not an s3 url") + } + + path := strings.TrimPrefix(path.Clean(u.Path()), "/") + return &s3.GetObjectInput{ + Bucket: &u.url.Host, + Key: &path, + }, nil +} + +func (u AbsoluteURL) ToGSObject(client *storage.Client) (*storage.ObjectHandle, error) { + if u.scheme != "gs" { + return nil, errors.New("not a gs url") + } + + path := strings.TrimPrefix(path.Clean(u.Path()), "/") + return client.Bucket(u.url.Host).Object(path), nil +}