Skip to content

Commit a8f021f

Browse files
Add search tags functionality (#128)
* Prepare for tag search tests * tag- * fix up cases for serach folders * make all the tag search folder paths lowercase
1 parent 1a054b7 commit a8f021f

15 files changed

Lines changed: 3085 additions & 3 deletions

File tree

  • src
  • tests
    • mocks
      • handlers/tags
      • scripts
  • types

src/page-loaders.ts

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import {
22
getSeriesUrl,
33
getTagUrl,
4+
getSearchUrlFromTagFilters,
45
getTagWorksFeedAtomUrl,
56
getTagWorksFeedUrl,
67
getUserProfileUrl,
@@ -11,7 +12,7 @@ import {
1112
import { CheerioAPI } from "cheerio";
1213
import { load } from "cheerio/slim";
1314
import { getFetcher } from "./fetcher";
14-
import { ArchiveId } from "types/entities";
15+
import { ArchiveId, TagSearchFilters } from "types/entities";
1516

1617
// This is a wrapper around the fetch function that loads the page into a CheerioAPI
1718
// instance and returns the type of the page.
@@ -61,6 +62,19 @@ export const loadTagPage = async ({ tagName }: { tagName: string }) => {
6162
});
6263
};
6364

65+
export interface TagSearchPage extends CheerioAPI {
66+
kind: "TagSearchPage";
67+
}
68+
export const loadTagSearchPage = async ({
69+
tagSearchFilters,
70+
}: {
71+
tagSearchFilters: TagSearchFilters;
72+
}) => {
73+
return await fetchPage<TagSearchPage>({
74+
url: getSearchUrlFromTagFilters(tagSearchFilters),
75+
});
76+
};
77+
6478
// Atom feed of the most recent works featuring a tag.
6579
// Sample: https://archiveofourown.org/tags/91247110/feed.atom
6680
export interface TagWorksAtomFeed extends CheerioAPI {

src/tags/index.ts

Lines changed: 41 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,49 @@ import { getTagId, getTagNameFromFeed } from "./works-feed-getters";
1212
import {
1313
loadTagFeedAtomPage,
1414
loadTagPage,
15+
loadTagSearchPage,
1516
loadTagWorksFeed,
1617
} from "src/page-loaders";
1718

18-
import type { Tag } from "types/entities";
19+
import type {
20+
Tag,
21+
TagSearchFilters,
22+
TagSearchResultSummary,
23+
} from "types/entities";
24+
import {
25+
getPagesCount,
26+
getTagsSearchResults,
27+
getTotalResults,
28+
} from "./search-getters";
29+
30+
export const searchTags = async (
31+
tagSearchFilters: Partial<TagSearchFilters>
32+
): Promise<TagSearchResultSummary> => {
33+
// We normalize the filters to ensure they have the required properties.
34+
const normalizedFilters: TagSearchFilters = {
35+
tagName: tagSearchFilters.tagName ?? null,
36+
fandoms: tagSearchFilters.fandoms ?? [],
37+
type: tagSearchFilters.type ?? "any",
38+
wranglingStatus: tagSearchFilters.wranglingStatus ?? "any",
39+
sortColumn: tagSearchFilters.sortColumn ?? "name",
40+
sortDirection: tagSearchFilters.sortDirection ?? "asc",
41+
page: tagSearchFilters.page ?? 1,
42+
};
43+
44+
const page = await loadTagSearchPage({ tagSearchFilters: normalizedFilters });
45+
46+
return {
47+
// We return the filters as is because they are already normalized
48+
// and the API expects them to be in this format.
49+
filters: normalizedFilters,
50+
totalResults: getTotalResults(page),
51+
pages: {
52+
total: getPagesCount(page),
53+
current: normalizedFilters.page,
54+
},
55+
tags: getTagsSearchResults(page),
56+
};
57+
};
1958

2059
export const getTag = async ({
2160
tagName,
@@ -43,6 +82,7 @@ export const getTag = async ({
4382
};
4483
};
4584

85+
// TODO: this is really getCanonicalTagNameById
4686
export const getTagNameById = async ({ tagId }: { tagId: string }) => {
4787
return getTagNameFromFeed(await loadTagFeedAtomPage({ tagId }));
4888
};

src/tags/search-getters.ts

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
import { TagSearchPage } from "src/page-loaders";
2+
import { TagSearchResultSummary } from "types/entities";
3+
4+
const parseIntOrThrow = (text: string) => {
5+
const match = text.trim().match(/^(\d+)/);
6+
if (!match) {
7+
throw new Error(`Invalid integer: ${text}`);
8+
}
9+
return parseInt(match[1].trim(), 10);
10+
};
11+
12+
export const getTotalResults = (page: TagSearchPage) => {
13+
const totalResultsMatch = page("h3.heading")
14+
.first()
15+
.text()
16+
.match(/(\d+)\s+Found/);
17+
return totalResultsMatch ? parseIntOrThrow(totalResultsMatch[1]) : 0;
18+
};
19+
20+
export const getPagesCount = (page: TagSearchPage) => {
21+
const lastPageMatch = page(".pagination.actions li:not(.next, .previous)")
22+
.last()
23+
.text();
24+
return lastPageMatch ? parseIntOrThrow(lastPageMatch) : 0;
25+
};
26+
27+
export const getTagsSearchResults = (page: TagSearchPage) => {
28+
return page("ol.tag.index.group > li")
29+
.map((_, li) => {
30+
const $li = page(li);
31+
const link = $li.find("a.tag").first();
32+
if (!link.length) {
33+
return null;
34+
}
35+
36+
const name = link.text().trim();
37+
38+
// Tags are in the format: "Type: Name (Works Count)"
39+
// Here we extract the works count.
40+
const worksMatch = $li.text().match(/\((\d+)\)\s*$/);
41+
const worksCount = parseIntOrThrow(worksMatch![1]);
42+
43+
// Tags are in the format: "Type: Name (Works Count)"
44+
// Here we extract the type.
45+
const typeMatch = $li.text().match(/^([^:]+):/);
46+
if (!typeMatch) {
47+
throw new Error(`Invalid tag type: ${$li.text()}`);
48+
}
49+
const type = typeMatch[1].trim().toLowerCase();
50+
51+
const classes = new Set(
52+
($li.find("span").attr("class") ?? "").split(/\s+/).filter(Boolean)
53+
);
54+
55+
return {
56+
name,
57+
type:
58+
type == "unsortedtag"
59+
? "unsorted"
60+
: (type as TagSearchResultSummary["tags"][number]["type"]),
61+
canonical: classes.has("canonical"),
62+
worksCount,
63+
} as const;
64+
})
65+
.get()
66+
.filter((tag) => tag !== null);
67+
};

src/urls.ts

Lines changed: 44 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ import {
33
isValidArchiveIdOrNullish,
44
parseArchiveId,
55
} from "./utils";
6-
import { WorkSummary } from "types/entities";
6+
import { TagSearchFilters, WorkSummary } from "types/entities";
77

88
declare global {
99
var archiveBaseUrl: string;
@@ -195,3 +195,46 @@ export const getWorkDetailsFromUrl = ({
195195
collectionName: url.match(/collections\/(\w+)/)?.[1],
196196
};
197197
};
198+
199+
const getSearchParamsFromTagFilters = (
200+
searchFilters: Partial<TagSearchFilters>
201+
) => {
202+
// Prepare the parameters for the search as a map first. This makes them a bit
203+
// more readable, since these parameters will all need to be wrapped with with
204+
// "tag_search[]" in the URL.
205+
const parameters = {
206+
name: searchFilters.tagName ?? "",
207+
fandoms: searchFilters.fandoms?.join(",") ?? "",
208+
type: searchFilters.type?.toLowerCase() ?? "",
209+
wrangling_status:
210+
searchFilters.wranglingStatus
211+
// We remove the _or_ and _and_ that we added for readability
212+
// so that the values match the expected values for the API.
213+
?.replaceAll("_or_", "_")
214+
.replaceAll("_and_", "_") ?? "any",
215+
sort_column:
216+
searchFilters.sortColumn === "works_count"
217+
? "uses"
218+
: searchFilters.sortColumn ?? "name",
219+
sort_direction: searchFilters.sortDirection ?? "asc",
220+
};
221+
222+
const searchParams = new URLSearchParams();
223+
if (searchFilters.page) {
224+
searchParams.set("page", String(searchFilters.page));
225+
}
226+
searchParams.set("commit", "Search Tags");
227+
228+
// Now add the parameters to the search params, wrapped with "tag_search[]"
229+
for (const [key, value] of Object.entries(parameters)) {
230+
searchParams.set(`tag_search[${key}]`, value);
231+
}
232+
233+
return searchParams;
234+
};
235+
236+
export const getSearchUrlFromTagFilters = (searchFilters: TagSearchFilters) => {
237+
const url = new URL(`tags/search`, getArchiveBaseUrl());
238+
url.search = getSearchParamsFromTagFilters(searchFilters).toString();
239+
return url.href;
240+
};

0 commit comments

Comments
 (0)