-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathsearch_normalization.py
More file actions
31 lines (23 loc) · 883 Bytes
/
Copy pathsearch_normalization.py
File metadata and controls
31 lines (23 loc) · 883 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
from __future__ import annotations
import unicodedata
from collections.abc import Iterable
def _strip_accents(text: str) -> str:
normalized = unicodedata.normalize("NFD", text)
return "".join(ch for ch in normalized if unicodedata.category(ch) != "Mn")
def normalize_search_text(text: str) -> str:
"""Normalize input for case- and accent-insensitive search."""
if text is None:
return ""
cleaned = _strip_accents(str(text))
compact = " ".join(cleaned.split())
return compact.casefold()
def normalize_search_list(values: Iterable[str] | None) -> list[str]:
normalized: list[str] = []
seen = set()
for value in values or []:
candidate = normalize_search_text(value)
if not candidate or candidate in seen:
continue
seen.add(candidate)
normalized.append(candidate)
return normalized