FujoWebDev
diff --git a/‎src/page-loaders.ts‎
Lines changed: 15 additions & 1 deletion b/‎src/page-loaders.ts‎
Lines changed: 15 additions & 1 deletion
diff --git a/‎src/tags/index.ts‎
Lines changed: 41 additions & 1 deletion b/‎src/tags/index.ts‎
Lines changed: 41 additions & 1 deletion
diff --git a/‎src/tags/search-getters.ts‎
Lines changed: 67 additions & 0 deletions b/‎src/tags/search-getters.ts‎
Lines changed: 67 additions & 0 deletions
diff --git a/‎src/urls.ts‎
Lines changed: 44 additions & 1 deletion b/‎src/urls.ts‎
Lines changed: 44 additions & 1 deletion
@@ -1,6 +1,7 @@
 import {
   getSeriesUrl,
   getTagUrl,
+  getSearchUrlFromTagFilters,
   getTagWorksFeedAtomUrl,
   getTagWorksFeedUrl,
   getUserProfileUrl,
@@ -11,7 +12,7 @@ import {
 import { CheerioAPI } from "cheerio";
 import { load } from "cheerio/slim";
 import { getFetcher } from "./fetcher";
-import { ArchiveId } from "types/entities";
+import { ArchiveId, TagSearchFilters } from "types/entities";
 
 // This is a wrapper around the fetch function that loads the page into a CheerioAPI
 // instance and returns the type of the page.
@@ -61,6 +62,19 @@ export const loadTagPage = async ({ tagName }: { tagName: string }) => {
   });
 };
 
+export interface TagSearchPage extends CheerioAPI {
+  kind: "TagSearchPage";
+}
+export const loadTagSearchPage = async ({
+  tagSearchFilters,
+}: {
+  tagSearchFilters: TagSearchFilters;
+}) => {
+  return await fetchPage<TagSearchPage>({
+    url: getSearchUrlFromTagFilters(tagSearchFilters),
+  });
+};
+
 // Atom feed of the most recent works featuring a tag.
 // Sample: https://archiveofourown.org/tags/91247110/feed.atom
 export interface TagWorksAtomFeed extends CheerioAPI {
 
@@ -12,10 +12,49 @@ import { getTagId, getTagNameFromFeed } from "./works-feed-getters";
 import {
   loadTagFeedAtomPage,
   loadTagPage,
+  loadTagSearchPage,
   loadTagWorksFeed,
 } from "src/page-loaders";
 
-import type { Tag } from "types/entities";
+import type {
+  Tag,
+  TagSearchFilters,
+  TagSearchResultSummary,
+} from "types/entities";
+import {
+  getPagesCount,
+  getTagsSearchResults,
+  getTotalResults,
+} from "./search-getters";
+
+export const searchTags = async (
+  tagSearchFilters: Partial<TagSearchFilters>
+): Promise<TagSearchResultSummary> => {
+  // We normalize the filters to ensure they have the required properties.
+  const normalizedFilters: TagSearchFilters = {
+    tagName: tagSearchFilters.tagName ?? null,
+    fandoms: tagSearchFilters.fandoms ?? [],
+    type: tagSearchFilters.type ?? "any",
+    wranglingStatus: tagSearchFilters.wranglingStatus ?? "any",
+    sortColumn: tagSearchFilters.sortColumn ?? "name",
+    sortDirection: tagSearchFilters.sortDirection ?? "asc",
+    page: tagSearchFilters.page ?? 1,
+  };
+
+  const page = await loadTagSearchPage({ tagSearchFilters: normalizedFilters });
+
+  return {
+    // We return the filters as is because they are already normalized
+    // and the API expects them to be in this format.
+    filters: normalizedFilters,
+    totalResults: getTotalResults(page),
+    pages: {
+      total: getPagesCount(page),
+      current: normalizedFilters.page,
+    },
+    tags: getTagsSearchResults(page),
+  };
+};
 
 export const getTag = async ({
   tagName,
@@ -43,6 +82,7 @@ export const getTag = async ({
   };
 };
 
+// TODO: this is really getCanonicalTagNameById
 export const getTagNameById = async ({ tagId }: { tagId: string }) => {
   return getTagNameFromFeed(await loadTagFeedAtomPage({ tagId }));
 };
@@ -0,0 +1,67 @@
+import { TagSearchPage } from "src/page-loaders";
+import { TagSearchResultSummary } from "types/entities";
+
+const parseIntOrThrow = (text: string) => {
+  const match = text.trim().match(/^(\d+)/);
+  if (!match) {
+    throw new Error(`Invalid integer: ${text}`);
+  }
+  return parseInt(match[1].trim(), 10);
+};
+
+export const getTotalResults = (page: TagSearchPage) => {
+  const totalResultsMatch = page("h3.heading")
+    .first()
+    .text()
+    .match(/(\d+)\s+Found/);
+  return totalResultsMatch ? parseIntOrThrow(totalResultsMatch[1]) : 0;
+};
+
+export const getPagesCount = (page: TagSearchPage) => {
+  const lastPageMatch = page(".pagination.actions li:not(.next, .previous)")
+    .last()
+    .text();
+  return lastPageMatch ? parseIntOrThrow(lastPageMatch) : 0;
+};
+
+export const getTagsSearchResults = (page: TagSearchPage) => {
+  return page("ol.tag.index.group > li")
+    .map((_, li) => {
+      const $li = page(li);
+      const link = $li.find("a.tag").first();
+      if (!link.length) {
+        return null;
+      }
+
+      const name = link.text().trim();
+
+      // Tags are in the format: "Type: Name (Works Count)"
+      // Here we extract the works count.
+      const worksMatch = $li.text().match(/\((\d+)\)\s*$/);
+      const worksCount = parseIntOrThrow(worksMatch![1]);
+
+      // Tags are in the format: "Type: Name (Works Count)"
+      // Here we extract the type.
+      const typeMatch = $li.text().match(/^([^:]+):/);
+      if (!typeMatch) {
+        throw new Error(`Invalid tag type: ${$li.text()}`);
+      }
+      const type = typeMatch[1].trim().toLowerCase();
+
+      const classes = new Set(
+        ($li.find("span").attr("class") ?? "").split(/\s+/).filter(Boolean)
+      );
+
+      return {
+        name,
+        type:
+          type == "unsortedtag"
+            ? "unsorted"
+            : (type as TagSearchResultSummary["tags"][number]["type"]),
+        canonical: classes.has("canonical"),
+        worksCount,
+      } as const;
+    })
+    .get()
+    .filter((tag) => tag !== null);
+};
@@ -3,7 +3,7 @@ import {
   isValidArchiveIdOrNullish,
   parseArchiveId,
 } from "./utils";
-import { WorkSummary } from "types/entities";
+import { TagSearchFilters, WorkSummary } from "types/entities";
 
 declare global {
   var archiveBaseUrl: string;
@@ -195,3 +195,46 @@ export const getWorkDetailsFromUrl = ({
     collectionName: url.match(/collections\/(\w+)/)?.[1],
   };
 };
+
+const getSearchParamsFromTagFilters = (
+  searchFilters: Partial<TagSearchFilters>
+) => {
+  // Prepare the parameters for the search as a map first. This makes them a bit
+  // more readable, since these parameters will all need to be wrapped with with
+  // "tag_search[]" in the URL.
+  const parameters = {
+    name: searchFilters.tagName ?? "",
+    fandoms: searchFilters.fandoms?.join(",") ?? "",
+    type: searchFilters.type?.toLowerCase() ?? "",
+    wrangling_status:
+      searchFilters.wranglingStatus
+        // We remove the _or_ and _and_ that we added for readability
+        // so that the values match the expected values for the API.
+        ?.replaceAll("_or_", "_")
+        .replaceAll("_and_", "_") ?? "any",
+    sort_column:
+      searchFilters.sortColumn === "works_count"
+        ? "uses"
+        : searchFilters.sortColumn ?? "name",
+    sort_direction: searchFilters.sortDirection ?? "asc",
+  };
+
+  const searchParams = new URLSearchParams();
+  if (searchFilters.page) {
+    searchParams.set("page", String(searchFilters.page));
+  }
+  searchParams.set("commit", "Search Tags");
+
+  // Now add the parameters to the search params, wrapped with "tag_search[]"
+  for (const [key, value] of Object.entries(parameters)) {
+    searchParams.set(`tag_search[${key}]`, value);
+  }
+
+  return searchParams;
+};
+
+export const getSearchUrlFromTagFilters = (searchFilters: TagSearchFilters) => {
+  const url = new URL(`tags/search`, getArchiveBaseUrl());
+  url.search = getSearchParamsFromTagFilters(searchFilters).toString();
+  return url.href;
+};