Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 43 additions & 0 deletions cmd/rwp/cmd/helpers/inference.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
package helpers

import (
"errors"

"github.com/readium/go-toolkit/pkg/streamer"
)

type InferA11yMetadata streamer.InferA11yMetadata

// String is used both by fmt.Print and by Cobra in help text
func (e *InferA11yMetadata) String() string {
if e == nil {
return "no"
}
switch *e {
case InferA11yMetadata(streamer.InferA11yMetadataMerged):
return "merged"
case InferA11yMetadata(streamer.InferA11yMetadataSplit):
return "split"
default:
return "no"
}
}

func (e *InferA11yMetadata) Set(v string) error {
switch v {
case "no":
*e = InferA11yMetadata(streamer.InferA11yMetadataNo)
case "merged":
*e = InferA11yMetadata(streamer.InferA11yMetadataMerged)
case "split":
*e = InferA11yMetadata(streamer.InferA11yMetadataSplit)
default:
return errors.New(`must be one of "no", "merged", or "split"`)
}
return nil
}

// Type is only used in help text.
func (e *InferA11yMetadata) Type() string {
return "string"
}
51 changes: 51 additions & 0 deletions cmd/rwp/cmd/helpers/inspector.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
package helpers

import (
"io/fs"

"github.com/pkg/errors"
"github.com/readium/go-toolkit/pkg/analyzer"
"github.com/readium/go-toolkit/pkg/manifest"
)

type ImageInspector struct {
Filesystem fs.FS
Algorithms []manifest.HashAlgorithm
err error
}

func (n *ImageInspector) Error() error {
return n.err
}

// TransformHREF implements ManifestTransformer
func (n *ImageInspector) TransformHREF(href manifest.HREF) manifest.HREF {
// Identity
return href
}

// TransformLink implements ManifestTransformer
func (n *ImageInspector) TransformLink(link manifest.Link) manifest.Link {
if n.err != nil || link.MediaType == nil || !link.MediaType.IsBitmap() {
return link
}

newLink, err := analyzer.Image(n.Filesystem, link, n.Algorithms)
if err != nil {
n.err = errors.Wrap(err, "failed inspecting image "+link.Href.String())
return link
}
return *newLink
}

// TransformManifest implements ManifestTransformer
func (n *ImageInspector) TransformManifest(manifest manifest.Manifest) manifest.Manifest {
// Identity
return manifest
}

// TransformMetadata implements ManifestTransformer
func (n *ImageInspector) TransformMetadata(metadata manifest.Metadata) manifest.Metadata {
// Identity
return metadata
}
84 changes: 45 additions & 39 deletions cmd/rwp/cmd/manifest.go
Original file line number Diff line number Diff line change
@@ -1,25 +1,38 @@
package cmd

import (
"context"
"encoding/json"
"errors"
"fmt"
"path/filepath"

"github.com/pkg/errors"
"github.com/readium/go-toolkit/cmd/rwp/cmd/helpers"
"github.com/readium/go-toolkit/pkg/asset"
"github.com/readium/go-toolkit/pkg/fetcher"
"github.com/readium/go-toolkit/pkg/manifest"
"github.com/readium/go-toolkit/pkg/streamer"
"github.com/readium/go-toolkit/pkg/util/url"
"github.com/spf13/cobra"
)

// Indentation used to pretty-print.
var indentFlag string

// Infer accessibility metadata.
var inferA11yFlag InferA11yMetadata
var inferA11yFlag helpers.InferA11yMetadata

// Infer the number of pages from the generated position list.
var inferPageCountFlag bool

/*var inferIgnoreImageHashesFlag []string

var inferIgnoreImageDirectoryFlag string*/
Comment thread
mickael-menu marked this conversation as resolved.

var hash []string

var inspectImagesFlag bool

var manifestCmd = &cobra.Command{
Use: "manifest <pub-path>",
Short: "Generate a Readium Web Publication Manifest for a publication",
Expand Down Expand Up @@ -53,17 +66,42 @@ Examples:
// occurs.
cmd.SilenceUsage = true

path := filepath.Clean(args[0])
path, err := url.FromFilepath(filepath.Clean(args[0]))
if err != nil {
return fmt.Errorf("failed creating URL from filepath: %w", err)
}
pub, err := streamer.New(streamer.Config{
InferA11yMetadata: streamer.InferA11yMetadata(inferA11yFlag),
InferPageCount: inferPageCountFlag,
}).Open(
context.TODO(),
asset.File(path), "",
)
if err != nil {
return fmt.Errorf("failed opening %s: %w", path, err)
}

if inspectImagesFlag {
hashAlgorithms := make([]manifest.HashAlgorithm, len(hash))
for i, h := range hash {
hashAlgorithms[i] = manifest.HashAlgorithm(h)
}
inspector := &helpers.ImageInspector{
Algorithms: hashAlgorithms,
Filesystem: fetcher.ToFS(context.TODO(), pub.Fetcher),
}

// Inspect publication files and overwrite the links
pub.Manifest.ReadingOrder = pub.Manifest.ReadingOrder.Copy(inspector)
if inspector.Error() != nil {
return fmt.Errorf("failed inspecting images in reading order: %w", inspector.Error())
}
pub.Manifest.Resources = pub.Manifest.Resources.Copy(inspector)
if inspector.Error() != nil {
return fmt.Errorf("failed inspecting images in resources: %w", inspector.Error())
}
}

var jsonBytes []byte
if indentFlag == "" {
jsonBytes, err = json.Marshal(pub.Manifest)
Expand All @@ -84,40 +122,8 @@ func init() {
manifestCmd.Flags().StringVarP(&indentFlag, "indent", "i", "", "Indentation used to pretty-print")
manifestCmd.Flags().Var(&inferA11yFlag, "infer-a11y", "Infer accessibility metadata: no, merged, split")
manifestCmd.Flags().BoolVar(&inferPageCountFlag, "infer-page-count", false, "Infer the number of pages from the generated position list.")
}

type InferA11yMetadata streamer.InferA11yMetadata

// String is used both by fmt.Print and by Cobra in help text
func (e *InferA11yMetadata) String() string {
if e == nil {
return "no"
}
switch *e {
case InferA11yMetadata(streamer.InferA11yMetadataMerged):
return "merged"
case InferA11yMetadata(streamer.InferA11yMetadataSplit):
return "split"
default:
return "no"
}
}

func (e *InferA11yMetadata) Set(v string) error {
switch v {
case "no":
*e = InferA11yMetadata(streamer.InferA11yMetadataNo)
case "merged":
*e = InferA11yMetadata(streamer.InferA11yMetadataMerged)
case "split":
*e = InferA11yMetadata(streamer.InferA11yMetadataSplit)
default:
return errors.New(`must be one of "no", "merged", or "split"`)
}
return nil
}

// Type is only used in help text.
func (e *InferA11yMetadata) Type() string {
return "string"
manifestCmd.Flags().StringSliceVar(&hash, "hash", []string{string(manifest.HashAlgorithmSHA256), string(manifest.HashAlgorithmMD5)}, "Hashes to use when enhancing links, such as with image inspection. Note visual hashes are more computationally expensive. Acceptable values: sha256,md5,phash-dct,https://blurha.sh")
manifestCmd.Flags().BoolVar(&inspectImagesFlag, "inspect-images", false, "Inspect images in the manifest. Their links will be enhanced with size, width and height, and hashes")
// manifestCmd.Flags().StringSliceVar(&inferIgnoreImageHashesFlag, "infer-a11y-ignore-image-hashes", nil, "Ignore the given hashes when inferring textual accessibility. Hashes are in the format <algorithm>:<base64 value>, separated by commas.")
// manifestCmd.Flags().StringVar(&inferIgnoreImageDirectoryFlag, "infer-a11y-ignore-image-dir", "", "Ignore the images in a given directory when inferring textual accessibility.")
}
87 changes: 86 additions & 1 deletion cmd/rwp/cmd/serve.go
Original file line number Diff line number Diff line change
@@ -1,18 +1,27 @@
package cmd

import (
"context"
"errors"
"fmt"
"log"
"net/http"
"os"
"path/filepath"
"time"

"log/slog"

"cloud.google.com/go/storage"
"github.com/aws/aws-sdk-go-v2/aws"
"github.com/aws/aws-sdk-go-v2/config"
"github.com/aws/aws-sdk-go-v2/credentials"
"github.com/aws/aws-sdk-go-v2/service/s3"
"github.com/readium/go-toolkit/cmd/rwp/cmd/serve"
"github.com/readium/go-toolkit/cmd/rwp/cmd/serve/client"
"github.com/readium/go-toolkit/pkg/streamer"
"github.com/spf13/cobra"
"google.golang.org/api/option"
)

var debugFlag bool
Expand All @@ -21,6 +30,19 @@ var bindAddressFlag string

var bindPortFlag uint16

// Cloud-related flags
var s3EndpointFlag string
var s3RegionFlag string
var s3AccessKeyFlag string
var s3SecretKeyFlag string

var httpAuthorizationFlag string

var remoteArchiveTimeoutFlag uint32
var remoteArchiveCacheSize uint32
var remoteArchiveCacheCount uint32
var remoteArchiveCacheAll uint32

var serveCmd = &cobra.Command{
Use: "serve <directory>",
Short: "Start a local HTTP server, serving a specified directory of publications",
Expand Down Expand Up @@ -74,12 +96,64 @@ to the internet except for testing/debugging purposes.`,
slog.SetLogLoggerLevel(slog.LevelInfo)
}

// Set up remote publication retrieval clients
remote := serve.Remote{}

// S3
options := []func(*config.LoadOptions) error{
config.WithRegion(s3RegionFlag),
config.WithRequestChecksumCalculation(0),
config.WithResponseChecksumValidation(0),
// TODO: look into custom HTTP client, user-agent
}
if s3AccessKeyFlag != "" && s3SecretKeyFlag != "" {
options = append(options, config.WithCredentialsProvider(credentials.NewStaticCredentialsProvider(s3AccessKeyFlag, s3SecretKeyFlag, "")))
}
cfg, err := config.LoadDefaultConfig(context.Background(), options...)
if err != nil {
log.Fatal(err)
}
_, err = cfg.Credentials.Retrieve(context.Background())
if err == nil {
remote.S3 = s3.NewFromConfig(cfg, func(o *s3.Options) {
if s3EndpointFlag != "" {
o.BaseEndpoint = aws.String(s3EndpointFlag)
}
})
} else {
slog.Warn("S3 credentials retrieval failed, S3 support will be disabled", "error", err)
}

// GCS
opts := []option.ClientOption{
option.WithScopes(storage.ScopeReadOnly),
storage.WithJSONReads(),
// option.WithUserAgent(TODO),
// TODO: look into more efficient transport (HTTP client)
}
remote.GCS, err = storage.NewClient(context.Background(), opts...)
if err != nil {
slog.Warn("GCS client creation failed, GCS support will be disabled", "error", err)
}

remote.HTTP, err = client.NewHTTPClient(httpAuthorizationFlag)
if err != nil {
slog.Warn("HTTP client creation failed, HTTP support will be disabled", "error", err)
}

// Remote archive streaming tweaks
remote.Config.CacheCountThreshold = int64(remoteArchiveCacheCount)
remote.Config.CacheSizeThreshold = int64(remoteArchiveCacheSize)
remote.Config.Timeout = time.Duration(remoteArchiveTimeoutFlag) * time.Second
remote.Config.CacheAllThreshold = int64(remoteArchiveCacheAll)

// Create server
pubServer := serve.NewServer(serve.ServerConfig{
Debug: debugFlag,
BaseDirectory: path,
JSONIndent: indentFlag,
InferA11yMetadata: streamer.InferA11yMetadata(inferA11yFlag),
})
}, remote)

bind := fmt.Sprintf("%s:%d", bindAddressFlag, bindPortFlag)
httpServer := &http.Server{
Expand Down Expand Up @@ -109,4 +183,15 @@ func init() {
serveCmd.Flags().Var(&inferA11yFlag, "infer-a11y", "Infer accessibility metadata: no, merged, split")
serveCmd.Flags().BoolVarP(&debugFlag, "debug", "d", false, "Enable debug mode")

serveCmd.Flags().StringVar(&s3EndpointFlag, "s3-endpoint", "", "Custom S3 endpoint URL")
serveCmd.Flags().StringVar(&s3RegionFlag, "s3-region", "auto", "S3 region")
serveCmd.Flags().StringVar(&s3AccessKeyFlag, "s3-access-key", "", "S3 access key")
serveCmd.Flags().StringVar(&s3SecretKeyFlag, "s3-secret-key", "", "S3 secret key")

serveCmd.Flags().StringVar(&httpAuthorizationFlag, "http-authorization", "", "HTTP authorization header value (e.g. 'Bearer <token>' or 'Basic <base64-credentials>')")

serveCmd.Flags().Uint32Var(&remoteArchiveTimeoutFlag, "remote-archive-timeout", 60, "Timeout for remote archive requests (in seconds)")
serveCmd.Flags().Uint32Var(&remoteArchiveCacheSize, "remote-archive-cache-size", 1024*1024, "Max size of items in an archive that can be cached (in bytes)")
serveCmd.Flags().Uint32Var(&remoteArchiveCacheCount, "remote-archive-cache-count", 64, "Max number of items in an archive that can be cached")
serveCmd.Flags().Uint32Var(&remoteArchiveCacheAll, "remote-archive-cache-all", 1024*1024, "Archives this size or less (in bytes) will be cached in full")
}
Loading
Loading