|
| 1 | +from sprag import Module, debounce, dom |
| 2 | + |
| 3 | + |
| 4 | +class SearchModule(Module): |
| 5 | + def __init__(self, screen=None, state=None): |
| 6 | + super().__init__(screen=screen, state=state or {}) |
| 7 | + self._docs = [] |
| 8 | + self._loaded = False |
| 9 | + |
| 10 | + def on_start(self): |
| 11 | + initial = self._read_query_param() |
| 12 | + input_el = dom.query("[data-role='search-input']", self.element) |
| 13 | + if input_el and initial: |
| 14 | + input_el.value = initial |
| 15 | + self.delegate(self.element, "input", "[data-role='search-input']", self.on_input) |
| 16 | + self._set_status("Loading search index…") |
| 17 | + self.load_index(initial) |
| 18 | + |
| 19 | + async def load_index(self, initial_query): |
| 20 | + try: |
| 21 | + response = await browser.fetch("../static/search-index.json") |
| 22 | + payload = await response.json() |
| 23 | + self._docs = self._prepare(payload["docs"]) |
| 24 | + self._loaded = True |
| 25 | + self._run(initial_query) |
| 26 | + except Exception as err: |
| 27 | + self._render_results([], []) |
| 28 | + self._set_status("Couldn't load the search index.") |
| 29 | + |
| 30 | + @debounce(0.12) |
| 31 | + def on_input(self, event, target): |
| 32 | + self._run(target.value) |
| 33 | + |
| 34 | + def _run(self, query): |
| 35 | + if not self._loaded: |
| 36 | + return None |
| 37 | + trimmed = query.strip() |
| 38 | + if not trimmed: |
| 39 | + self._render_results([], []) |
| 40 | + self._set_status("Type to search the docs.") |
| 41 | + return None |
| 42 | + tokens = self._tokenize(trimmed.lower()) |
| 43 | + if len(tokens) == 0: |
| 44 | + self._render_results([], []) |
| 45 | + self._set_status("Type to search the docs.") |
| 46 | + return None |
| 47 | + results = self._score(tokens) |
| 48 | + if len(results) == 0: |
| 49 | + self._render_results([], []) |
| 50 | + self._set_status("No results for “" + trimmed + "”.") |
| 51 | + return None |
| 52 | + self._render_results(results, tokens) |
| 53 | + self._set_status(str(len(results)) + " result" + ("" if len(results) == 1 else "s") + " for “" + trimmed + "”") |
| 54 | + |
| 55 | + def _set_status(self, text): |
| 56 | + el = dom.query("[data-role='search-status']", self.element) |
| 57 | + if el: |
| 58 | + el.textContent = text |
| 59 | + |
| 60 | + def _render_results(self, items, tokens): |
| 61 | + container = dom.query("[data-role='search-results']", self.element) |
| 62 | + if not container: |
| 63 | + return None |
| 64 | + dom.clear(container) |
| 65 | + doc = browser.document |
| 66 | + for item in items: |
| 67 | + li = doc.createElement("li") |
| 68 | + li.className = "search-result" |
| 69 | + a = doc.createElement("a") |
| 70 | + a.href = item["url"] |
| 71 | + a.className = "search-result-link" |
| 72 | + section = doc.createElement("div") |
| 73 | + section.className = "search-result-section" |
| 74 | + section.textContent = item["section"] |
| 75 | + a.appendChild(section) |
| 76 | + title = doc.createElement("div") |
| 77 | + title.className = "search-result-title" |
| 78 | + self._highlight(title, item["title"], tokens) |
| 79 | + a.appendChild(title) |
| 80 | + if item["snippet"]: |
| 81 | + snippet = doc.createElement("div") |
| 82 | + snippet.className = "search-result-snippet" |
| 83 | + self._highlight(snippet, item["snippet"], tokens) |
| 84 | + a.appendChild(snippet) |
| 85 | + li.appendChild(a) |
| 86 | + container.appendChild(li) |
| 87 | + |
| 88 | + def _highlight(self, parent, text, tokens): |
| 89 | + if not text: |
| 90 | + return None |
| 91 | + if len(tokens) == 0: |
| 92 | + parent.textContent = text |
| 93 | + return None |
| 94 | + doc = browser.document |
| 95 | + text_lc = text.lower() |
| 96 | + n = len(text) |
| 97 | + i = 0 |
| 98 | + plain_start = 0 |
| 99 | + while i < n: |
| 100 | + matched_len = 0 |
| 101 | + for token in tokens: |
| 102 | + tlen = len(token) |
| 103 | + if text_lc.slice(i, i + tlen) == token: |
| 104 | + matched_len = tlen |
| 105 | + break |
| 106 | + if matched_len > 0: |
| 107 | + if i > plain_start: |
| 108 | + parent.appendChild(doc.createTextNode(text.slice(plain_start, i))) |
| 109 | + mark = doc.createElement("mark") |
| 110 | + mark.textContent = text.slice(i, i + matched_len) |
| 111 | + parent.appendChild(mark) |
| 112 | + i = i + matched_len |
| 113 | + plain_start = i |
| 114 | + else: |
| 115 | + i = i + 1 |
| 116 | + if plain_start < n: |
| 117 | + parent.appendChild(doc.createTextNode(text.slice(plain_start, n))) |
| 118 | + |
| 119 | + def _tokenize(self, query_lc): |
| 120 | + tokens = [] |
| 121 | + for word in query_lc.split(" "): |
| 122 | + cleaned = word.strip() |
| 123 | + if cleaned: |
| 124 | + tokens.push(cleaned) |
| 125 | + return tokens |
| 126 | + |
| 127 | + def _score(self, tokens): |
| 128 | + matches = [] |
| 129 | + for doc in self._docs: |
| 130 | + score = 0 |
| 131 | + all_hit = True |
| 132 | + for token in tokens: |
| 133 | + if token in doc["title_lc"]: |
| 134 | + score += 10 |
| 135 | + elif token in doc["headings_lc"]: |
| 136 | + score += 4 |
| 137 | + elif token in doc["description_lc"]: |
| 138 | + score += 3 |
| 139 | + elif token in doc["body_lc"]: |
| 140 | + score += 1 |
| 141 | + else: |
| 142 | + all_hit = False |
| 143 | + break |
| 144 | + if all_hit and score > 0: |
| 145 | + matches.push({ |
| 146 | + "title": doc["title"], |
| 147 | + "url": doc["url"], |
| 148 | + "section": doc["section"], |
| 149 | + "snippet": self._snippet(doc, tokens), |
| 150 | + "score": score, |
| 151 | + }) |
| 152 | + |
| 153 | + matches.sort(lambda a, b: b["score"] - a["score"]) |
| 154 | + return matches.slice(0, 30) |
| 155 | + |
| 156 | + def _snippet(self, doc, tokens): |
| 157 | + body = doc["body"] |
| 158 | + body_lc = doc["body_lc"] |
| 159 | + if not body: |
| 160 | + return doc["description"] or "" |
| 161 | + |
| 162 | + pos = -1 |
| 163 | + for token in tokens: |
| 164 | + i = body_lc.indexOf(token) |
| 165 | + if i >= 0: |
| 166 | + pos = i |
| 167 | + break |
| 168 | + |
| 169 | + if pos < 0: |
| 170 | + tail = "…" if len(body) > 160 else "" |
| 171 | + return body.slice(0, 160) + tail |
| 172 | + |
| 173 | + start = pos - 60 |
| 174 | + if start < 0: |
| 175 | + start = 0 |
| 176 | + end = pos + 120 |
| 177 | + prefix = "…" if start > 0 else "" |
| 178 | + suffix = "…" if end < len(body) else "" |
| 179 | + return prefix + body.slice(start, end) + suffix |
| 180 | + |
| 181 | + def _prepare(self, docs): |
| 182 | + prepared = [] |
| 183 | + for d in docs: |
| 184 | + headings_joined = d["headings"].join(" ") |
| 185 | + prepared.push({ |
| 186 | + "title": d["title"], |
| 187 | + "title_lc": d["title"].lower(), |
| 188 | + "url": d["url"], |
| 189 | + "section": d["section"], |
| 190 | + "description": d["description"], |
| 191 | + "description_lc": d["description"].lower(), |
| 192 | + "headings_lc": headings_joined.lower(), |
| 193 | + "body": d["body"], |
| 194 | + "body_lc": d["body"].lower(), |
| 195 | + }) |
| 196 | + return prepared |
| 197 | + |
| 198 | + def _read_query_param(self): |
| 199 | + raw = browser.location.search |
| 200 | + if not raw or len(raw) < 3: |
| 201 | + return "" |
| 202 | + body = raw.slice(1) |
| 203 | + for pair in body.split("&"): |
| 204 | + if pair.slice(0, 2) == "q=": |
| 205 | + value = pair.slice(2).replace_all("+", " ") |
| 206 | + return browser.decodeURIComponent(value) |
| 207 | + return "" |
0 commit comments