3232 "]+" ,
3333 flags = re .UNICODE ,
3434)
35+ URL_RE = re .compile (r"https?://\S+|www\.\S+" , flags = re .IGNORECASE )
36+ HASHTAG_RE = re .compile (r"(^|\s)#\w+" )
37+
38+ LOW_SIGNAL_RE = re .compile (
39+ r"\b(giveaway|airdrop|retweet\s+to\s+win|win\s+free|pump|moon|token\s+price|telegram\s+community)\b" ,
40+ flags = re .IGNORECASE ,
41+ )
3542
3643
3744def load_json (path : Path ):
@@ -77,51 +84,146 @@ def normalize_text(text: str, limit: int = 100) -> str:
7784 return clean [: limit - 1 ].rstrip () + "…"
7885
7986
80- def enforce_style (text : str ) -> str :
87+ def strip_noise (text : str ) -> str :
88+ s = URL_RE .sub ("" , text or "" )
89+ s = HASHTAG_RE .sub ("" , s )
90+ s = re .sub (r"@\w+" , "" , s )
91+ s = re .sub (r"\s+" , " " , s ).strip ()
92+ return s
93+
94+
95+ def enforce_style (text : str , * , remove_links : bool = False , no_hashtags : bool = False ) -> str :
8196 s = text or ""
8297 s = s .replace ("!" , "" )
8398 s = s .replace ("—" , " " ).replace ("–" , " " )
8499 s = EMOJI_RE .sub ("" , s )
100+ if remove_links :
101+ s = URL_RE .sub ("" , s )
102+ if no_hashtags :
103+ s = HASHTAG_RE .sub ("" , s )
85104 s = re .sub (r"\s+" , " " , s ).strip ()
86105 return s [:278 ]
87106
88107
89- def build_root_text ( account : str , role : str ) -> str :
108+ def root_ideas ( role : str ) -> str :
90109 ideas = {
91- "founder" : "Building AI products is mostly distribution math plus feedback loops. Own both and ship faster." ,
92- "brand" : "Good AI products win when onboarding is instant, outcomes are measurable, and support feels human." ,
93- "product-agent" : "Agent workflows improve when memory, orchestration, and evals are designed as one system." ,
110+ "founder" : "We are building accountable AI operations where identity, bounds, and receipts are structural, not optional." ,
111+ "brand" : "Structural trust means governance lives in infrastructure and every meaningful action is verifiable." ,
112+ "product-agent" : "Agent systems get reliable when memory, orchestration, and verification are designed as one operating model." ,
113+ }
114+ return ideas .get (role , ideas ["brand" ])
115+
116+
117+ def detect_topic (source_text : str ) -> tuple [str , str ]:
118+ s = (source_text or "" ).lower ()
119+ topic_map = [
120+ ("evals" , ["eval" , "benchmark" , "test" , "score" ]),
121+ ("memory" , ["memory" , "context" , "recall" , "state" ]),
122+ ("orchestration" , ["orchestration" , "workflow" , "pipeline" , "automation" ]),
123+ ("governance" , ["governance" , "policy" , "compliance" , "guardrail" ]),
124+ ("verification" , ["verify" , "proof" , "receipt" , "audit" , "on-chain" ]),
125+ ("agent" , ["agent" , "autonomous" , "multi-agent" ]),
126+ ("distribution" , ["distribution" , "growth" , "retention" , "onboarding" ]),
127+ ("shipping" , ["ship" , "release" , "roadmap" , "milestone" ]),
128+ ]
129+ for topic , kws in topic_map :
130+ for kw in kws :
131+ if kw in s :
132+ return topic , kw
133+ return "general" , ""
134+
135+
136+ def build_reply_text (role : str , target_user : str , source_text : str , idx_seed : int ) -> str :
137+ topic , kw = detect_topic (source_text )
138+ k = kw or "this"
139+
140+ variants = {
141+ "founder" : {
142+ "evals" : [
143+ f"@{ target_user } Strong point on evals. In practice, { k } only matters if it changes routing decisions and failure handling." ,
144+ f"@{ target_user } Agree on eval direction. We treat { k } as an operating control, not a reporting artifact." ,
145+ ],
146+ "verification" : [
147+ f"@{ target_user } This maps to our view. Verification has to be built into execution, not added after the fact." ,
148+ f"@{ target_user } Yes. Without verifiable receipts, accountability collapses into trust claims." ,
149+ ],
150+ "distribution" : [
151+ f"@{ target_user } Distribution is the constraint most teams underprice. Measurable retention is what validates the channel." ,
152+ f"@{ target_user } Good framing. Distribution quality shows up in repeatable retention, not reach spikes." ,
153+ ],
154+ "general" : [
155+ f"@{ target_user } Good observation. The useful test is whether it changes operator control, reliability, or verification quality." ,
156+ f"@{ target_user } Useful angle. I care most about what can be measured and repeated in production." ,
157+ ],
158+ },
159+ "product-agent" : {
160+ "evals" : [
161+ f"@{ target_user } Useful thread. How are you feeding eval outcomes back into orchestration policy after deployment?" ,
162+ f"@{ target_user } Curious about your eval loop design. Do failed cases automatically update routing or guardrails?" ,
163+ ],
164+ "memory" : [
165+ f"@{ target_user } Good point on memory. Are you separating short-term context from durable decisions in your pipeline?" ,
166+ f"@{ target_user } Memory quality usually decides reliability. How are you handling stale context detection?" ,
167+ ],
168+ "orchestration" : [
169+ f"@{ target_user } Strong orchestration point. Are you optimizing for throughput, reliability, or reversibility first?" ,
170+ f"@{ target_user } Practical question: what part of the orchestration stack is your current bottleneck?" ,
171+ ],
172+ "general" : [
173+ f"@{ target_user } Useful angle. What has this changed in your production workflow so far?" ,
174+ f"@{ target_user } Thanks for sharing. What metric improved most after this change?" ,
175+ ],
176+ },
177+ "brand" : {
178+ "governance" : [
179+ f"@{ target_user } This is aligned with how we think about governance. Constraints need to be enforceable in system behavior." ,
180+ f"@{ target_user } Agreed. Governance only works when the system can prove what was allowed and what was blocked." ,
181+ ],
182+ "verification" : [
183+ f"@{ target_user } Exactly. Verification quality determines whether trust is operational or just narrative." ,
184+ f"@{ target_user } Same view here. Verifiable proof creates accountability that survives handoffs and scale." ,
185+ ],
186+ "orchestration" : [
187+ f"@{ target_user } Strong point. Reliable orchestration is usually the difference between demos and durable systems." ,
188+ f"@{ target_user } We see this too. Orchestration quality compounds faster than model-level tuning." ,
189+ ],
190+ "general" : [
191+ f"@{ target_user } Solid perspective. The key is whether it improves measurable outcomes in production." ,
192+ f"@{ target_user } Good signal. What makes this useful is the path from idea to repeatable operational impact." ,
193+ ],
194+ },
94195 }
95- base = ideas .get (role , ideas ["brand" ])
96- return enforce_style (f"{ base } #{ account } " )
97-
98-
99- def safe_founder_reply (target_user : str , excerpt : str ) -> str :
100- raw = (
101- f"@{ target_user } Good signal. I care less about hype and more about repeatable distribution plus retention. "
102- f"{ excerpt } "
103- )
104- return enforce_style (raw )
105-
106196
107- def build_reply_text (account : str , role : str , target_user : str , source_text : str ) -> str :
108- excerpt = normalize_text (source_text , limit = 80 )
109- if role == "founder" :
110- return safe_founder_reply (target_user , excerpt )
111- if role == "product-agent" :
112- return enforce_style (
113- f"@{ target_user } Useful thread. Curious what your eval loop looks like once this is in production. { excerpt } "
114- )
115- return enforce_style (
116- f"@{ target_user } Solid point. We see the same pattern in shipping: clear UX plus measurable outcomes compound. { excerpt } "
117- )
197+ role_bucket = variants .get (role , variants ["brand" ])
198+ options = role_bucket .get (topic ) or role_bucket ["general" ]
199+ return options [idx_seed % len (options )]
118200
119201
120202def contains_denylist (text : str , keywords : list [str ]) -> bool :
121203 t = (text or "" ).lower ()
122204 return any (k .lower () in t for k in keywords )
123205
124206
207+ def source_anchor (text : str ) -> str :
208+ stop = {
209+ "this" , "that" , "with" , "from" , "your" , "about" , "into" , "once" , "when" , "what" ,
210+ "have" , "been" , "they" , "them" , "then" , "than" , "will" , "just" , "more" , "less" ,
211+ }
212+ words = re .findall (r"[a-zA-Z0-9]+" , (text or "" ).lower ())
213+ keep = [w for w in words if len (w ) > 3 and w not in stop ]
214+ if not keep :
215+ return ""
216+ return " " .join (keep [:4 ])
217+
218+
219+ def canonical_reply (text : str ) -> str :
220+ s = (text or "" ).lower ().strip ()
221+ s = re .sub (r"@\w+" , "" , s )
222+ s = re .sub (r"[^a-z0-9\s]" , " " , s )
223+ s = re .sub (r"\s+" , " " , s ).strip ()
224+ return s
225+
226+
125227def default_resolver (target_user : str | None , account : str ) -> dict | None :
126228 account_queries = {
127229 "TheCesarCross" : "AI founders OR agentic workflow OR product distribution" ,
@@ -134,29 +236,36 @@ def default_resolver(target_user: str | None, account: str) -> dict | None:
134236 else :
135237 query = account_queries .get (account , "AI products OR automation" )
136238
137- data = run_json (["x-cli" , "-j" , "tweet" , "search" , query , "--max" , "10 " ])
239+ data = run_json (["x-cli" , "-j" , "tweet" , "search" , query , "--max" , "15 " ])
138240 if not isinstance (data , list ) or not data :
139241 return None
140242
141243 for t in data :
142244 if not isinstance (t , dict ):
143245 continue
144246 tweet_id = t .get ("id" )
145- text = t .get ("text" )
146- author = ((t .get ("author" ) or {}).get ("username" ) if isinstance (t .get ("author" ), dict ) else None )
147- if tweet_id and text :
148- return {"id" : str (tweet_id ), "text" : text , "author" : author }
247+ text = t .get ("text" ) or ""
248+ author = (t .get ("author" , {}).get ("username" ) if isinstance (t .get ("author" ), dict ) else None )
249+ if not tweet_id or not text :
250+ continue
251+ if LOW_SIGNAL_RE .search (text ):
252+ continue
253+ return {"id" : str (tweet_id ), "text" : text , "author" : author }
149254 return None
150255
151256
152- def hydrate_single_action (action : dict , policy : dict , resolver : Resolver ) -> dict :
257+ def hydrate_single_action (action : dict , policy : dict , resolver : Resolver , seen : dict ) -> dict :
153258 out = dict (action )
154259 account = out .get ("account" , "" )
155260 role = policy .get ("account_strategy" , {}).get (account , {}).get ("role" , "brand" )
156261 founder_keywords = policy .get ("founder_denylist" , {}).get ("keywords" , [])
157262
263+ style = policy .get ("copy_style" , {}).get ("for_all_accounts" , {})
264+ no_hashtags = bool (style .get ("no_hashtags" , True ))
265+ no_links_in_replies = bool (style .get ("no_links_in_replies" , True ))
266+
158267 if out .get ("action" ) == "root_post" :
159- post_text = build_root_text ( account , role )
268+ post_text = enforce_style ( root_ideas ( role ), no_hashtags = no_hashtags )
160269 if role == "founder" and contains_denylist (post_text , founder_keywords ):
161270 out ["hydration_status" ] = "blocked"
162271 out ["hydration_reason" ] = "founder_denylist_hit"
@@ -174,15 +283,46 @@ def hydrate_single_action(action: dict, policy: dict, resolver: Resolver) -> dic
174283 out ["hydration_reason" ] = "no_candidate_tweet"
175284 return out
176285
286+ tweet_id = str (candidate ["id" ])
287+ if tweet_id in seen ["tweet_ids" ]:
288+ out ["hydration_status" ] = "blocked"
289+ out ["hydration_reason" ] = "duplicate_target_tweet"
290+ return out
291+
292+ source_text = strip_noise (candidate .get ("text" , "" ))
293+ if not source_text or LOW_SIGNAL_RE .search (source_text ):
294+ out ["hydration_status" ] = "blocked"
295+ out ["hydration_reason" ] = "low_signal_source"
296+ return out
297+
298+ topic , _ = detect_topic (source_text )
299+ if topic == "general" :
300+ out ["hydration_status" ] = "blocked"
301+ out ["hydration_reason" ] = "insufficient_context_specificity"
302+ return out
303+
177304 target_user = out .get ("target_user" ) or candidate .get ("author" ) or "builder"
178- reply_text = build_reply_text (account , role , target_user , candidate .get ("text" , "" ))
305+ idx_seed = len (seen ["reply_norms" ]) + len (target_user ) + len (tweet_id )
306+ reply_raw = build_reply_text (role , target_user , source_text , idx_seed )
307+ anchor = source_anchor (source_text )
308+ if anchor and len (anchor .split ()) >= 2 :
309+ reply_raw = f"{ reply_raw } Specific to { anchor } ."
310+ reply_text = enforce_style (reply_raw , remove_links = no_links_in_replies , no_hashtags = no_hashtags )
179311
180312 if role == "founder" and contains_denylist (reply_text , founder_keywords ):
181313 out ["hydration_status" ] = "blocked"
182314 out ["hydration_reason" ] = "founder_denylist_hit"
183315 return out
184316
185- tweet_id = str (candidate ["id" ])
317+ norm = canonical_reply (reply_text )
318+ if norm in seen ["reply_norms" ]:
319+ out ["hydration_status" ] = "blocked"
320+ out ["hydration_reason" ] = "duplicate_reply_text"
321+ return out
322+
323+ seen ["tweet_ids" ].add (tweet_id )
324+ seen ["reply_norms" ].add (norm )
325+
186326 out ["target_user" ] = target_user
187327 out ["target_tweet_id" ] = tweet_id
188328 out ["reply_text" ] = reply_text
@@ -204,12 +344,14 @@ def hydrate_review(review: dict, policy: dict, resolver: Resolver) -> dict:
204344 ready = 0
205345 blocked = 0
206346
347+ seen = {"tweet_ids" : set (), "reply_norms" : set ()}
348+
207349 for account , payload in review .get ("accounts" , {}).items ():
208350 approved = payload .get ("approved_actions" , [])
209351 hydrated_approved = []
210352 for action in approved :
211353 total += 1
212- h = hydrate_single_action (action , policy , resolver )
354+ h = hydrate_single_action (action , policy , resolver , seen )
213355 hydrated_approved .append (h )
214356 if h .get ("hydration_status" ) == "hydrated" :
215357 ready += 1
0 commit comments