diff --git a/README.md b/README.md index 35b1d6b..6b45af9 100644 --- a/README.md +++ b/README.md @@ -39,6 +39,7 @@ services: # - OPDS__PORT=5228 # - OPDS__FEEDS__0__NAME=Some Feed # - OPDS__FEEDS__0__URL=http://some-feed.com/opds + # - OPDS__FEEDS__0__USER_AGENT=OPDS-Proxy/1.0 # - OPDS__FEEDS__0__AUTH__USERNAME=user # - OPDS__FEEDS__0__AUTH__PASSWORD=password # - OPDS__FEEDS__0__AUTH__LOCAL_ONLY=true @@ -84,6 +85,10 @@ auth: feeds: - name: Some Feed url: http://some-feed.com/opds + # (Optional) Feed Custom User Agent + # If present, requests to this feed will use this custom User-Agent. + # This is useful for feeds that require a specific User-Agent to function. + user_agent: OPDS-Proxy/1.0 # (Optional) Feed Authentication Credentials # If present, users will not be prompted for credentials in the web interface. # The server will take care of sending these with requests to the feed URL. diff --git a/config.yml b/config.yml index e9cbebb..b515128 100644 --- a/config.yml +++ b/config.yml @@ -2,5 +2,6 @@ debug: true feeds: - name: "Project Gutenberg" url: "https://www.gutenberg.org/ebooks/search.opds/" + user_agent: "OPDS-Proxy/1.0" - name: "Anarchist Library" url: "https://theanarchistlibrary.org/opds" diff --git a/handlers/feed.go b/handlers/feed.go index 4124be8..cde08fc 100644 --- a/handlers/feed.go +++ b/handlers/feed.go @@ -60,11 +60,26 @@ func (h *FeedHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { return } + var customUserAgent string + if parsedReqURL, err := url.Parse(resolvedURL); err == nil { + for _, feed := range h.feeds { + if parsedFeedURL, err := url.Parse(feed.Url); err == nil { + if parsedFeedURL.Hostname() == parsedReqURL.Hostname() && feed.UserAgent != "" { + customUserAgent = feed.UserAgent + break + } + } + } + } + creds := auth.GetCredentials(resolvedURL, r, h.feeds, h.s) resp, err := httpx.Fetch(resolvedURL, 10, func(req *http.Request) { if creds != nil { req.SetBasicAuth(creds.Username, creds.Password) } + if customUserAgent != "" { + req.Header.Set("User-Agent", customUserAgent) + } }) if err != nil { http.Error(w, fmt.Sprintf("Failed to fetch %q: %v", resolvedURL, err), http.StatusBadGateway) @@ -126,14 +141,19 @@ func (h *FeedHandler) resolveQueryURL(queryURL, searchTerm string) (string, erro return repl.Replace(queryURL), nil } - if tmpl, err := opds.ResolveOpenSearchTemplate(queryURL); err == nil && tmpl != "" { - return repl.Replace(tmpl), nil + // Fall back to appending the search parameter for non-OpenSearch servers + u, err := url.Parse(queryURL) + if err == nil { + q := u.Query() + q.Set("query", searchTerm) + u.RawQuery = q.Encode() + return u.String(), nil } return queryURL, nil } -func (h *FeedHandler) serveAtom(w http.ResponseWriter, r *http.Request, resp *http.Response, url string, deviceType device.DeviceType) error { +func (h *FeedHandler) serveAtom(w http.ResponseWriter, r *http.Request, resp *http.Response, feedUrl string, deviceType device.DeviceType) error { // Read the body so we can fall back to forwarding it on parse/render errors body, err := io.ReadAll(resp.Body) if err != nil { @@ -150,6 +170,50 @@ func (h *FeedHandler) serveAtom(w http.ResponseWriter, r *http.Request, resp *ht return nil } + // Intercept OpenSearch descriptor links and resolve them to URL templates + for i, link := range feed.Links { + if link.Rel == "search" && link.TypeLink == "application/opensearchdescription+xml" { + base, err := url.Parse(feedUrl) + if err == nil { + if rel, err := url.Parse(link.Href); err == nil { + osdURL := base.ResolveReference(rel).String() + + var customUserAgent string + if parsedReqURL, err := url.Parse(osdURL); err == nil { + for _, feedCfg := range h.feeds { + if parsedFeedURL, err := url.Parse(feedCfg.Url); err == nil { + if parsedFeedURL.Hostname() == parsedReqURL.Hostname() && feedCfg.UserAgent != "" { + customUserAgent = feedCfg.UserAgent + break + } + } + } + } + + creds := auth.GetCredentials(osdURL, r, h.feeds, h.s) + osdResp, err := httpx.Fetch(osdURL, 10, func(req *http.Request) { + if creds != nil { + req.SetBasicAuth(creds.Username, creds.Password) + } + if customUserAgent != "" { + req.Header.Set("User-Agent", customUserAgent) + } + }) + + if err == nil && osdResp.StatusCode >= 200 && osdResp.StatusCode < 300 { + if tmpl, err := opds.ParseOpenSearchTemplate(osdResp.Body); err == nil && tmpl != "" { + feed.Links[i].Href = tmpl + feed.Links[i].TypeLink = "application/atom+xml" + } + osdResp.Body.Close() + } else if err == nil { + osdResp.Body.Close() + } + } + } + } + } + entryID := r.URL.Query().Get("id") if entryID != "" { var entry opds.Entry @@ -165,7 +229,7 @@ func (h *FeedHandler) serveAtom(w http.ResponseWriter, r *http.Request, resp *ht } params := view.EntryParams{ - URL: url, + URL: feedUrl, Feed: feed, Entry: entry, DeviceType: deviceType, @@ -176,7 +240,7 @@ func (h *FeedHandler) serveAtom(w http.ResponseWriter, r *http.Request, resp *ht return nil } - params := view.FeedParams{URL: url, Feed: feed} + params := view.FeedParams{URL: feedUrl, Feed: feed} view.Render(w, func(buf io.Writer) error { return view.Feed(buf, params) }) return nil } diff --git a/internal/auth/util.go b/internal/auth/util.go index 174af4b..4c23dbf 100644 --- a/internal/auth/util.go +++ b/internal/auth/util.go @@ -68,7 +68,8 @@ type FeedAuth struct { } type FeedConfig struct { - Name string - Url string - Auth *FeedAuth + Name string + Url string + UserAgent string + Auth *FeedAuth } diff --git a/internal/formats/formats.go b/internal/formats/formats.go index 1c9c150..9c879b6 100644 --- a/internal/formats/formats.go +++ b/internal/formats/formats.go @@ -75,6 +75,8 @@ func FormatByMimeType(mimeType string) (Format, bool) { // Legacy/alternative MIME types "application/mobi": MOBI, "application/x-epub+zip": EPUB, + "application/xml": ATOM, + "text/xml": ATOM, } format, exists := formats[mimeType] diff --git a/internal/formats/formats_test.go b/internal/formats/formats_test.go new file mode 100644 index 0000000..136a939 --- /dev/null +++ b/internal/formats/formats_test.go @@ -0,0 +1,58 @@ +package formats + +import ( + "testing" +) + +func TestFormatByMimeType(t *testing.T) { + tests := []struct { + name string + mimeType string + want Format + wantOk bool + }{ + { + name: "ATOM from standard application/atom+xml", + mimeType: "application/atom+xml", + want: ATOM, + wantOk: true, + }, + { + name: "ATOM from generic application/xml", + mimeType: "application/xml", + want: ATOM, + wantOk: true, + }, + { + name: "ATOM from generic text/xml", + mimeType: "text/xml", + want: ATOM, + wantOk: true, + }, + { + name: "EPUB from standard application/epub+zip", + mimeType: "application/epub+zip", + want: EPUB, + wantOk: true, + }, + { + name: "Unknown format", + mimeType: "application/unknown", + want: Format{}, + wantOk: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, ok := FormatByMimeType(tt.mimeType) + if ok != tt.wantOk { + t.Errorf("FormatByMimeType() ok = %v, wantOk %v", ok, tt.wantOk) + return + } + if ok && got != tt.want { + t.Errorf("FormatByMimeType() got = %v, want %v", got, tt.want) + } + }) + } +} diff --git a/main.go b/main.go index f933545..e5b80ff 100644 --- a/main.go +++ b/main.go @@ -37,9 +37,10 @@ type AuthConfig struct { } type FeedConfig struct { - Name string `koanf:"name"` - Url string `koanf:"url"` - Auth *FeedConfigAuth `koanf:"auth"` + Name string `koanf:"name"` + Url string `koanf:"url"` + UserAgent string `koanf:"user_agent"` + Auth *FeedConfigAuth `koanf:"auth"` } type FeedConfigAuth struct { diff --git a/opds/entry_test.go b/opds/entry_test.go index cc0bbc8..014eadf 100644 --- a/opds/entry_test.go +++ b/opds/entry_test.go @@ -48,7 +48,7 @@ func TestEntryUnmarshal(t *testing.T) { Formats: AZW3,EPUB

A timeless romance following Elizabeth Bennet, a strong-willed young woman, and Mr. Darcy, a proud and wealthy gentleman. Set in Georgian England, the novel explores themes of love, marriage, social class, and personal growth through wit and humor.

When Elizabeth first meets Mr. Darcy at a ball, she finds him arrogant and disagreeable. Meanwhile, she is charmed by the dashing Mr. Wickham, who tells her tales of Darcy's alleged misconduct. As the story unfolds, Elizabeth discovers that first impressions can be deceiving, and that pride and prejudice can blind us to true character.

Through a series of misunderstandings, revelations, and personal growth, both Elizabeth and Darcy must overcome their initial judgments to find true love. This beloved classic remains one of the most popular novels in English literature.

- + diff --git a/opds/link.go b/opds/link.go index 0eeea68..c802cdd 100644 --- a/opds/link.go +++ b/opds/link.go @@ -41,7 +41,7 @@ type Links []Link // IsDownload checks if the link is an acquisition/download link func (l Link) IsDownload() bool { - return l.Rel == AcquisitionFeedRel + return strings.HasPrefix(l.Rel, AcquisitionFeedRel) } // IsImage checks if the link is an image with optional category filtering diff --git a/opds/opensearch.go b/opds/opensearch.go index cecfbed..b2c5375 100644 --- a/opds/opensearch.go +++ b/opds/opensearch.go @@ -4,8 +4,6 @@ import ( "encoding/xml" "fmt" "io" - "net/http" - "time" ) // OpenSearchDescription represents an OpenSearch Description Document (OSDD) @@ -25,30 +23,9 @@ type OSDUrl struct { Template string `xml:"template,attr"` } -// ResolveOpenSearchTemplate fetches an OSDD from the given URL and returns the -// Atom/OPDS template URL to use for search requests. It prefers -// "application/atom+xml;profile=opds-catalog" then falls back to -// "application/atom+xml" if needed. -func ResolveOpenSearchTemplate(osdURL string) (string, error) { - client := &http.Client{Timeout: 10 * time.Second} - - // simplified request: use client.Get since no headers are needed - resp, err := client.Get(osdURL) - if err != nil { - return "", fmt.Errorf("failed to fetch OpenSearch description from %q: %w", osdURL, err) - } - defer resp.Body.Close() - - if resp.StatusCode < 200 || resp.StatusCode >= 300 { - return "", fmt.Errorf("unexpected status fetching OSDD: %s", resp.Status) - } - - return parseOpenSearchTemplate(resp.Body) -} - -// parseOpenSearchTemplate parses an OpenSearch Description XML from r and +// ParseOpenSearchTemplate parses an OpenSearch Description XML from r and // returns the preferred Atom/OPDS search template URL. -func parseOpenSearchTemplate(r io.Reader) (string, error) { +func ParseOpenSearchTemplate(r io.Reader) (string, error) { // Stream decode to avoid buffering entire body in memory var d OpenSearchDescription decoder := xml.NewDecoder(r) @@ -56,18 +33,30 @@ func parseOpenSearchTemplate(r io.Reader) (string, error) { return "", fmt.Errorf("failed to parse OpenSearch description: %w", err) } - // First pass: look for the OPDS profile type + // First pass: look for the OPDS profile type with kind=acquisition + for _, u := range d.Urls { + if u.Type == "application/atom+xml;profile=opds-catalog;kind=acquisition" && u.Template != "" { + return u.Template, nil + } + } + // Second pass: look for the OPDS profile type for _, u := range d.Urls { if u.Type == "application/atom+xml;profile=opds-catalog" && u.Template != "" { return u.Template, nil } } - // Second pass: any atom+xml template + // Third pass: any atom+xml template for _, u := range d.Urls { if u.Type == "application/atom+xml" && u.Template != "" { return u.Template, nil } } + // Fourth pass: any template at all + for _, u := range d.Urls { + if u.Template != "" { + return u.Template, nil + } + } - return "", fmt.Errorf("no suitable Atom template found in OSDD") + return "", fmt.Errorf("no suitable template found in OSDD") } diff --git a/opds/opensearch_test.go b/opds/opensearch_test.go index 7d8fbf1..45f2840 100644 --- a/opds/opensearch_test.go +++ b/opds/opensearch_test.go @@ -12,7 +12,7 @@ func TestParseOpenSearchTemplate_PrefersOPDSProfile(t *testing.T) { ` - tmpl, err := parseOpenSearchTemplate(strings.NewReader(xml)) + tmpl, err := ParseOpenSearchTemplate(strings.NewReader(xml)) if err != nil { t.Fatalf("expected no error, got %v", err) } @@ -28,7 +28,7 @@ func TestParseOpenSearchTemplate_FallbackAtom(t *testing.T) { ` - tmpl, err := parseOpenSearchTemplate(strings.NewReader(xml)) + tmpl, err := ParseOpenSearchTemplate(strings.NewReader(xml)) if err != nil { t.Fatalf("expected no error, got %v", err) } @@ -38,14 +38,47 @@ func TestParseOpenSearchTemplate_FallbackAtom(t *testing.T) { } } -func TestParseOpenSearchTemplate_NoSuitable(t *testing.T) { +func TestParseOpenSearchTemplate_PrefersAcquisition(t *testing.T) { + xml := ` + + + + +` + + tmpl, err := ParseOpenSearchTemplate(strings.NewReader(xml)) + if err != nil { + t.Fatalf("expected no error, got %v", err) + } + want := "https://example.test/acquisition?q={searchTerms}" + if tmpl != want { + t.Fatalf("unexpected template. got %q want %q", tmpl, want) + } +} + +func TestParseOpenSearchTemplate_FallbackOther(t *testing.T) { xml := ` ` - _, err := parseOpenSearchTemplate(strings.NewReader(xml)) + tmpl, err := ParseOpenSearchTemplate(strings.NewReader(xml)) + if err != nil { + t.Fatalf("expected no error, got %v", err) + } + want := "https://example.test/html?q={searchTerms}" + if tmpl != want { + t.Fatalf("unexpected template. got %q want %q", tmpl, want) + } +} + +func TestParseOpenSearchTemplate_NoSuitable(t *testing.T) { + xml := ` + +` + + _, err := ParseOpenSearchTemplate(strings.NewReader(xml)) if err == nil { t.Fatalf("expected error, got nil") } @@ -58,7 +91,7 @@ func TestParseOpenSearchTemplate_EmptyTemplateIgnored(t *testing.T) { ` - tmpl, err := parseOpenSearchTemplate(strings.NewReader(xml)) + tmpl, err := ParseOpenSearchTemplate(strings.NewReader(xml)) if err != nil { t.Fatalf("expected no error, got %v", err) } @@ -74,7 +107,7 @@ func TestParseOpenSearchTemplate_MalformedXML(t *testing.T) { ` - _, err := parseOpenSearchTemplate(strings.NewReader(xml)) + _, err := ParseOpenSearchTemplate(strings.NewReader(xml)) if err == nil { t.Fatalf("expected XML parse error, got nil") } diff --git a/server.go b/server.go index 128b5e1..b9c0c4a 100644 --- a/server.go +++ b/server.go @@ -63,7 +63,7 @@ func NewServer(configData *ProxyConfig) (*Server, error) { // Feed adapted := make([]auth.FeedConfig, len(configData.Feeds)) for i, f := range configData.Feeds { - adapted[i] = auth.FeedConfig{Name: f.Name, Url: f.Url, Auth: toAuthPtr(f.Auth)} + adapted[i] = auth.FeedConfig{Name: f.Name, Url: f.Url, UserAgent: f.UserAgent, Auth: toAuthPtr(f.Auth)} } router.Handle("GET /feed", requestMiddleware(debounceMiddleware(handlers.Feed("tmp/", adapted, s, configData.DebugMode))))