|
22 | 22 |
|
23 | 23 | import spacy |
24 | 24 |
|
25 | | -from axl.models import Operation, TagType, V3Packet |
26 | 25 | from axl.emitter import emit_v3 |
27 | | - |
| 26 | +from axl.models import Operation, TagType, V3Packet |
28 | 27 |
|
29 | 28 | # -- spaCy singleton --------------------------------------------------------- |
30 | 29 |
|
@@ -503,7 +502,9 @@ def extract_evidence(text: str, doc=None) -> Optional[str]: |
503 | 502 |
|
504 | 503 | def _is_pronoun_like(text: str) -> bool: |
505 | 504 | stripped = text.strip().lower() |
506 | | - return stripped in _PRONOUNS or bool(re.fullmatch(r"(?:i|you|we|they|he|she|it|this|that)", stripped)) |
| 505 | + return stripped in _PRONOUNS or bool( |
| 506 | + re.fullmatch(r"(?:i|you|we|they|he|she|it|this|that)", stripped) |
| 507 | + ) |
507 | 508 |
|
508 | 509 |
|
509 | 510 | def _is_weak_subject(text: str) -> bool: |
@@ -630,7 +631,9 @@ def extract_subject( |
630 | 631 | # Fallback tokens. |
631 | 632 | if not best: |
632 | 633 | for tok in doc: |
633 | | - if tok.pos_ in ("PROPN", "NOUN") and not tok.like_num and not _is_pronoun_like(tok.text): |
| 634 | + if (tok.pos_ in ("PROPN", "NOUN") |
| 635 | + and not tok.like_num |
| 636 | + and not _is_pronoun_like(tok.text)): |
634 | 637 | tag, _ = classify_tag(tok.text) |
635 | 638 | clean = _clean_token_value(tok.text, max_len=30, keep=".-") |
636 | 639 | if clean: |
@@ -683,7 +686,9 @@ def infer_role_label(ent, doc) -> Optional[str]: |
683 | 686 | candidates.append((score, tok.text)) |
684 | 687 |
|
685 | 688 | head = ent.root.head |
686 | | - if head.pos_ in ("NOUN", "PROPN") and head.lemma_.lower() not in _ROLE_STOPWORDS and not head.like_num: |
| 689 | + if (head.pos_ in ("NOUN", "PROPN") |
| 690 | + and head.lemma_.lower() not in _ROLE_STOPWORDS |
| 691 | + and not head.like_num): |
687 | 692 | candidates.append((28, head.text)) |
688 | 693 |
|
689 | 694 | if not candidates: |
@@ -829,9 +834,14 @@ def english_to_v3(text: str, agent_id: str = "COMPRESS") -> list[V3Packet]: |
829 | 834 | operation = classify_operation(sent_text) |
830 | 835 |
|
831 | 836 | has_nums = bool( |
832 | | - sent_doc.ents and any(e.label_ in ("MONEY", "QUANTITY", "CARDINAL", "PERCENT", "DATE") for e in sent_doc.ents) |
| 837 | + sent_doc.ents and any( |
| 838 | + e.label_ in ("MONEY", "QUANTITY", "CARDINAL", "PERCENT", "DATE") |
| 839 | + for e in sent_doc.ents |
| 840 | + ) |
| 841 | + ) |
| 842 | + has_ents = bool( |
| 843 | + sent_doc.ents and any(e.label_ in ("PERSON", "ORG", "GPE") for e in sent_doc.ents) |
833 | 844 | ) |
834 | | - has_ents = bool(sent_doc.ents and any(e.label_ in ("PERSON", "ORG", "GPE") for e in sent_doc.ents)) |
835 | 845 | confidence = score_confidence(sent_text, operation, has_nums, has_ents) |
836 | 846 |
|
837 | 847 | tag_type, subject_value = extract_subject(sent_doc, sent_text, operation, context_subject) |
@@ -892,7 +902,9 @@ def english_to_v3(text: str, agent_id: str = "COMPRESS") -> list[V3Packet]: |
892 | 902 | if packets: |
893 | 903 | has_entities = any(p.subject_tag == TagType.ENTITY for p in packets) |
894 | 904 | has_numbers = any(p.arg2 and "^" in (p.arg2 or "") for p in packets) |
895 | | - has_causality = any(p.arg1 and ("<-" in (p.arg1 or "") or "RE:" in (p.arg1 or "")) for p in packets) |
| 905 | + has_causality = any( |
| 906 | + p.arg1 and ("<-" in (p.arg1 or "") or "RE:" in (p.arg1 or "")) for p in packets |
| 907 | + ) |
896 | 908 | has_confidence = True |
897 | 909 | has_temporal = any( |
898 | 910 | p.temporal != "NOW" or (p.arg2 and "^date:" in p.arg2) |
|
0 commit comments