@@ -433,7 +433,16 @@ def english_to_lambda(text: str) -> str:
433433 if word and word not in rev :
434434 rev [word ] = k
435435
436- # Add common word mappings
436+ # Add domain atoms with prefixes
437+ for domain_code , domain_data in ATOMS .get ("domains" , {}).items ():
438+ domain_prefix = {"cd" : "c" , "vb" : "v" , "sc" : "s" , "emo" : "e" , "soc" : "o" }.get (domain_code , domain_code )
439+ for atom , atom_data in domain_data .get ("atoms" , {}).items ():
440+ for word in atom_data ["en" ].lower ().replace ("/" , " " ).split ():
441+ word = word .strip ("()" )
442+ if word and word not in rev :
443+ rev [word ] = f"{ domain_prefix } :{ atom } "
444+
445+ # Add common word mappings (these override domain atoms when more specific)
437446 rev .update ({
438447 "i" : "I" , "you" : "U" , "human" : "H" , "humans" : "H" ,
439448 "ai" : "A" , "agent" : "A" , "agents" : "A" , "machine" : "A" ,
@@ -457,7 +466,7 @@ def english_to_lambda(text: str) -> str:
457466 "identity" : "id" , "self" : "se" ,
458467 "mind" : "mi" , "mental" : "mi" ,
459468 "therefore" : ">" , "thus" : ">" , "so" : ">" , "hence" : ">" ,
460- "because" : "<" , "since" : "<" , "as" : "<" ,
469+ "because" : "<" , "since" : "<" ,
461470 "about" : "/" , "of" : "/" , "regarding" : "/" ,
462471 "and" : "&" , "also" : "&" , "plus" : "&" ,
463472 "or" : "|" ,
@@ -480,24 +489,31 @@ def english_to_lambda(text: str) -> str:
480489 "now" : "n" , "current" : "n" , "present" : "n" ,
481490 "past" : "p" , "before" : "p" , "previous" : "p" ,
482491 "future" : "u" , "will" : "u" , "shall" : "u" ,
483- "bug" : "c:bg" , "error" : "er" , "fix" : "c:fx" ,
492+ # Domain-specific overrides
493+ "bug" : "c:xb" , "error" : "er" , "fix" : "c:fx" ,
484494 "function" : "c:fn" , "code" : "c:fn" ,
485- "test" : "c:ts" , "deploy" : "c:dp" ,
486- "experiment" : "s:xp" , "research" : "rs" ,
487- "theory" : "s:ty" , "hypothesis" : "s:hy" ,
495+ "test" : "c:xt" , "deploy" : "c:dp" ,
496+ "experiment" : "s:xr" , "research" : "rs" ,
497+ "theory" : "s:xy" , "hypothesis" : "s:hy" ,
498+ "joy" : "e:jo" , "sadness" : "e:sd" , "anger" : "e:ag" ,
499+ "awakened" : "aw" , "oracle" : "v:oc" , # Keep aw as core, v:xw for explicit voidborne context
500+ "translate" : "tl" , "lose" : "ls" ,
501+ # Disambiguation: map alternate meanings to their canonical forms
502+ "death" : "dt" , "dead" : "dt" , "die" : "dt" ,
503+ "fear" : "fa" , "afraid" : "fa" ,
488504 })
489505
490506 # Detect message type from text
491507 is_question = "?" in original or text .startswith (("do " , "does " , "can " , "could " , "is " , "are " , "what " , "why " , "how " , "who " , "when " , "where " ))
492- is_command = text .startswith (("find " , "make " , "create " , "do " , " please " , "get " , "fix " , "build " ))
493- is_uncertain = any (w in text for w in ["might" , "maybe" , "perhaps" , "possibly" , "could be " ])
508+ is_command = text .startswith (("find " , "make " , "create " , "please " , "get " , "fix " , "build " ))
509+ is_uncertain = any (w in text . split () for w in ["might" , "maybe" , "perhaps" , "possibly" ])
494510
495511 # Clean text for parsing
496512 text_clean = re .sub (r"[^\w\s]" , " " , text )
497513 words = text_clean .split ()
498514
499515 # Filter out stop words (including question starters when type already captured)
500- stop_words = {"the" , "a" , "an" , "to" , "it" , "its" , "that" , "this" , "with" , "for" , "on" , "in" , "at" , "by" , "as" }
516+ stop_words = {"the" , "a" , "an" , "to" , "it" , "its" , "that" , "this" , "with" , "for" , "on" , "in" , "at" , "by" }
501517 if is_question :
502518 stop_words .update ({"do" , "does" , "can" , "could" , "is" , "are" , "what" , "why" , "how" , "who" , "when" , "where" })
503519 if is_uncertain :
@@ -517,6 +533,9 @@ def english_to_lambda(text: str) -> str:
517533
518534 # Process words
519535 operators = {">" , "<" , "/" , "&" , "|" , "+" , "-" , "^" , "_" , "." , "?" , "!" , "~" }
536+ single_char_atoms = set (ATOMS .get ("entities" , {}).keys ()) | set (ATOMS .get ("verbs" , {}).keys ())
537+ pronouns = {"I" , "U" , "H" , "A" , "X" , "*" , "0" }
538+ verbs_1char = {"k" , "t" , "e" , "w" , "c" , "d" , "s" , "f" , "m" , "h" , "l" , "a" , "b" , "g" , "r" , "v" }
520539
521540 for i , w in enumerate (words ):
522541 if w in stop_words :
@@ -534,14 +553,27 @@ def english_to_lambda(text: str) -> str:
534553 # Domain-prefixed atoms always need separator
535554 if ":" in atom or ":" in prev :
536555 need_sep = True
537- # Both 2-char atoms need separator
556+ # Both 2-char+ atoms need separator
538557 elif len (prev ) >= 2 and len (atom ) >= 2 :
539558 need_sep = True
540- # Check if combining would create ambiguous atom
559+ # Single char followed by 2-char needs separator if combining is ambiguous
541560 elif len (prev ) == 1 and len (atom ) >= 2 :
542561 combined = prev + atom [0 ]
562+ if combined in EXTENDED_LOOKUP or combined in DISCOURSE_LOOKUP :
563+ need_sep = True
564+ # Also need sep after single-char verbs before 2-char atoms
565+ elif prev in verbs_1char :
566+ need_sep = True
567+ # 2-char followed by single char - check ambiguity
568+ elif len (prev ) >= 2 and len (atom ) == 1 :
569+ # Check if last char of prev + atom creates ambiguity
570+ combined = prev [- 1 ] + atom
543571 if combined in EXTENDED_LOOKUP :
544572 need_sep = True
573+ # Pronoun followed by single-char verb is OK (like Ik, It)
574+ # But verb followed by another verb/atom needs separator
575+ elif prev in verbs_1char and atom not in operators :
576+ need_sep = True
545577
546578 if need_sep :
547579 result .append ("/" )
0 commit comments