-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmemory_graph_terms.py
More file actions
45 lines (39 loc) · 951 Bytes
/
Copy pathmemory_graph_terms.py
File metadata and controls
45 lines (39 loc) · 951 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
"""Shared lexical hints for personal memory-graph routing and retrieval."""
import re
PERSONAL_DOCUMENT_TERMS = frozenset(
{
"doc",
"docs",
"document",
"documents",
"file",
"files",
"paperwork",
"record",
"records",
"report",
"reports",
"result",
"results",
"prescription",
"prescriptions",
"rx",
"spec",
"specs",
"specification",
"specifications",
"lab",
"labs",
"medical",
"glasses",
"eyeglasses",
"lens",
"lenses",
"id",
"identity",
}
)
def extract_normalized_tokens(text: str) -> set[str]:
return set(re.findall(r"[a-z0-9]+", (text or "").lower()))
def contains_personal_document_term(text: str) -> bool:
return any(term in extract_normalized_tokens(text) for term in PERSONAL_DOCUMENT_TERMS)