Skip to content

Commit 08f3bd2

Browse files
committed
cmo: enforce no hashtags/links and dedupe with context-specific reply gating
1 parent e6a7c73 commit 08f3bd2

File tree

2 files changed

+182
-37
lines changed

2 files changed

+182
-37
lines changed

ops/cmo-automation/config/operating_policy.json

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,9 @@
7070
"no_exclamation_marks": true,
7171
"no_emojis": true,
7272
"no_em_dash_or_en_dash": true,
73+
"no_hashtags": true,
74+
"no_links_in_replies": true,
75+
"unique_contextual_replies": true,
7376
"tone": "relevant_and_interesting"
7477
}
7578
},

ops/cmo-automation/scripts/hydrate_approved_queue.py

Lines changed: 179 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,13 @@
3232
"]+",
3333
flags=re.UNICODE,
3434
)
35+
URL_RE = re.compile(r"https?://\S+|www\.\S+", flags=re.IGNORECASE)
36+
HASHTAG_RE = re.compile(r"(^|\s)#\w+")
37+
38+
LOW_SIGNAL_RE = re.compile(
39+
r"\b(giveaway|airdrop|retweet\s+to\s+win|win\s+free|pump|moon|token\s+price|telegram\s+community)\b",
40+
flags=re.IGNORECASE,
41+
)
3542

3643

3744
def load_json(path: Path):
@@ -77,51 +84,146 @@ def normalize_text(text: str, limit: int = 100) -> str:
7784
return clean[: limit - 1].rstrip() + "…"
7885

7986

80-
def enforce_style(text: str) -> str:
87+
def strip_noise(text: str) -> str:
88+
s = URL_RE.sub("", text or "")
89+
s = HASHTAG_RE.sub("", s)
90+
s = re.sub(r"@\w+", "", s)
91+
s = re.sub(r"\s+", " ", s).strip()
92+
return s
93+
94+
95+
def enforce_style(text: str, *, remove_links: bool = False, no_hashtags: bool = False) -> str:
8196
s = text or ""
8297
s = s.replace("!", "")
8398
s = s.replace("—", " ").replace("–", " ")
8499
s = EMOJI_RE.sub("", s)
100+
if remove_links:
101+
s = URL_RE.sub("", s)
102+
if no_hashtags:
103+
s = HASHTAG_RE.sub("", s)
85104
s = re.sub(r"\s+", " ", s).strip()
86105
return s[:278]
87106

88107

89-
def build_root_text(account: str, role: str) -> str:
108+
def root_ideas(role: str) -> str:
90109
ideas = {
91-
"founder": "Building AI products is mostly distribution math plus feedback loops. Own both and ship faster.",
92-
"brand": "Good AI products win when onboarding is instant, outcomes are measurable, and support feels human.",
93-
"product-agent": "Agent workflows improve when memory, orchestration, and evals are designed as one system.",
110+
"founder": "We are building accountable AI operations where identity, bounds, and receipts are structural, not optional.",
111+
"brand": "Structural trust means governance lives in infrastructure and every meaningful action is verifiable.",
112+
"product-agent": "Agent systems get reliable when memory, orchestration, and verification are designed as one operating model.",
113+
}
114+
return ideas.get(role, ideas["brand"])
115+
116+
117+
def detect_topic(source_text: str) -> tuple[str, str]:
118+
s = (source_text or "").lower()
119+
topic_map = [
120+
("evals", ["eval", "benchmark", "test", "score"]),
121+
("memory", ["memory", "context", "recall", "state"]),
122+
("orchestration", ["orchestration", "workflow", "pipeline", "automation"]),
123+
("governance", ["governance", "policy", "compliance", "guardrail"]),
124+
("verification", ["verify", "proof", "receipt", "audit", "on-chain"]),
125+
("agent", ["agent", "autonomous", "multi-agent"]),
126+
("distribution", ["distribution", "growth", "retention", "onboarding"]),
127+
("shipping", ["ship", "release", "roadmap", "milestone"]),
128+
]
129+
for topic, kws in topic_map:
130+
for kw in kws:
131+
if kw in s:
132+
return topic, kw
133+
return "general", ""
134+
135+
136+
def build_reply_text(role: str, target_user: str, source_text: str, idx_seed: int) -> str:
137+
topic, kw = detect_topic(source_text)
138+
k = kw or "this"
139+
140+
variants = {
141+
"founder": {
142+
"evals": [
143+
f"@{target_user} Strong point on evals. In practice, {k} only matters if it changes routing decisions and failure handling.",
144+
f"@{target_user} Agree on eval direction. We treat {k} as an operating control, not a reporting artifact.",
145+
],
146+
"verification": [
147+
f"@{target_user} This maps to our view. Verification has to be built into execution, not added after the fact.",
148+
f"@{target_user} Yes. Without verifiable receipts, accountability collapses into trust claims.",
149+
],
150+
"distribution": [
151+
f"@{target_user} Distribution is the constraint most teams underprice. Measurable retention is what validates the channel.",
152+
f"@{target_user} Good framing. Distribution quality shows up in repeatable retention, not reach spikes.",
153+
],
154+
"general": [
155+
f"@{target_user} Good observation. The useful test is whether it changes operator control, reliability, or verification quality.",
156+
f"@{target_user} Useful angle. I care most about what can be measured and repeated in production.",
157+
],
158+
},
159+
"product-agent": {
160+
"evals": [
161+
f"@{target_user} Useful thread. How are you feeding eval outcomes back into orchestration policy after deployment?",
162+
f"@{target_user} Curious about your eval loop design. Do failed cases automatically update routing or guardrails?",
163+
],
164+
"memory": [
165+
f"@{target_user} Good point on memory. Are you separating short-term context from durable decisions in your pipeline?",
166+
f"@{target_user} Memory quality usually decides reliability. How are you handling stale context detection?",
167+
],
168+
"orchestration": [
169+
f"@{target_user} Strong orchestration point. Are you optimizing for throughput, reliability, or reversibility first?",
170+
f"@{target_user} Practical question: what part of the orchestration stack is your current bottleneck?",
171+
],
172+
"general": [
173+
f"@{target_user} Useful angle. What has this changed in your production workflow so far?",
174+
f"@{target_user} Thanks for sharing. What metric improved most after this change?",
175+
],
176+
},
177+
"brand": {
178+
"governance": [
179+
f"@{target_user} This is aligned with how we think about governance. Constraints need to be enforceable in system behavior.",
180+
f"@{target_user} Agreed. Governance only works when the system can prove what was allowed and what was blocked.",
181+
],
182+
"verification": [
183+
f"@{target_user} Exactly. Verification quality determines whether trust is operational or just narrative.",
184+
f"@{target_user} Same view here. Verifiable proof creates accountability that survives handoffs and scale.",
185+
],
186+
"orchestration": [
187+
f"@{target_user} Strong point. Reliable orchestration is usually the difference between demos and durable systems.",
188+
f"@{target_user} We see this too. Orchestration quality compounds faster than model-level tuning.",
189+
],
190+
"general": [
191+
f"@{target_user} Solid perspective. The key is whether it improves measurable outcomes in production.",
192+
f"@{target_user} Good signal. What makes this useful is the path from idea to repeatable operational impact.",
193+
],
194+
},
94195
}
95-
base = ideas.get(role, ideas["brand"])
96-
return enforce_style(f"{base} #{account}")
97-
98-
99-
def safe_founder_reply(target_user: str, excerpt: str) -> str:
100-
raw = (
101-
f"@{target_user} Good signal. I care less about hype and more about repeatable distribution plus retention. "
102-
f"{excerpt}"
103-
)
104-
return enforce_style(raw)
105-
106196

107-
def build_reply_text(account: str, role: str, target_user: str, source_text: str) -> str:
108-
excerpt = normalize_text(source_text, limit=80)
109-
if role == "founder":
110-
return safe_founder_reply(target_user, excerpt)
111-
if role == "product-agent":
112-
return enforce_style(
113-
f"@{target_user} Useful thread. Curious what your eval loop looks like once this is in production. {excerpt}"
114-
)
115-
return enforce_style(
116-
f"@{target_user} Solid point. We see the same pattern in shipping: clear UX plus measurable outcomes compound. {excerpt}"
117-
)
197+
role_bucket = variants.get(role, variants["brand"])
198+
options = role_bucket.get(topic) or role_bucket["general"]
199+
return options[idx_seed % len(options)]
118200

119201

120202
def contains_denylist(text: str, keywords: list[str]) -> bool:
121203
t = (text or "").lower()
122204
return any(k.lower() in t for k in keywords)
123205

124206

207+
def source_anchor(text: str) -> str:
208+
stop = {
209+
"this", "that", "with", "from", "your", "about", "into", "once", "when", "what",
210+
"have", "been", "they", "them", "then", "than", "will", "just", "more", "less",
211+
}
212+
words = re.findall(r"[a-zA-Z0-9]+", (text or "").lower())
213+
keep = [w for w in words if len(w) > 3 and w not in stop]
214+
if not keep:
215+
return ""
216+
return " ".join(keep[:4])
217+
218+
219+
def canonical_reply(text: str) -> str:
220+
s = (text or "").lower().strip()
221+
s = re.sub(r"@\w+", "", s)
222+
s = re.sub(r"[^a-z0-9\s]", " ", s)
223+
s = re.sub(r"\s+", " ", s).strip()
224+
return s
225+
226+
125227
def default_resolver(target_user: str | None, account: str) -> dict | None:
126228
account_queries = {
127229
"TheCesarCross": "AI founders OR agentic workflow OR product distribution",
@@ -134,29 +236,36 @@ def default_resolver(target_user: str | None, account: str) -> dict | None:
134236
else:
135237
query = account_queries.get(account, "AI products OR automation")
136238

137-
data = run_json(["x-cli", "-j", "tweet", "search", query, "--max", "10"])
239+
data = run_json(["x-cli", "-j", "tweet", "search", query, "--max", "15"])
138240
if not isinstance(data, list) or not data:
139241
return None
140242

141243
for t in data:
142244
if not isinstance(t, dict):
143245
continue
144246
tweet_id = t.get("id")
145-
text = t.get("text")
146-
author = ((t.get("author") or {}).get("username") if isinstance(t.get("author"), dict) else None)
147-
if tweet_id and text:
148-
return {"id": str(tweet_id), "text": text, "author": author}
247+
text = t.get("text") or ""
248+
author = (t.get("author", {}).get("username") if isinstance(t.get("author"), dict) else None)
249+
if not tweet_id or not text:
250+
continue
251+
if LOW_SIGNAL_RE.search(text):
252+
continue
253+
return {"id": str(tweet_id), "text": text, "author": author}
149254
return None
150255

151256

152-
def hydrate_single_action(action: dict, policy: dict, resolver: Resolver) -> dict:
257+
def hydrate_single_action(action: dict, policy: dict, resolver: Resolver, seen: dict) -> dict:
153258
out = dict(action)
154259
account = out.get("account", "")
155260
role = policy.get("account_strategy", {}).get(account, {}).get("role", "brand")
156261
founder_keywords = policy.get("founder_denylist", {}).get("keywords", [])
157262

263+
style = policy.get("copy_style", {}).get("for_all_accounts", {})
264+
no_hashtags = bool(style.get("no_hashtags", True))
265+
no_links_in_replies = bool(style.get("no_links_in_replies", True))
266+
158267
if out.get("action") == "root_post":
159-
post_text = build_root_text(account, role)
268+
post_text = enforce_style(root_ideas(role), no_hashtags=no_hashtags)
160269
if role == "founder" and contains_denylist(post_text, founder_keywords):
161270
out["hydration_status"] = "blocked"
162271
out["hydration_reason"] = "founder_denylist_hit"
@@ -174,15 +283,46 @@ def hydrate_single_action(action: dict, policy: dict, resolver: Resolver) -> dic
174283
out["hydration_reason"] = "no_candidate_tweet"
175284
return out
176285

286+
tweet_id = str(candidate["id"])
287+
if tweet_id in seen["tweet_ids"]:
288+
out["hydration_status"] = "blocked"
289+
out["hydration_reason"] = "duplicate_target_tweet"
290+
return out
291+
292+
source_text = strip_noise(candidate.get("text", ""))
293+
if not source_text or LOW_SIGNAL_RE.search(source_text):
294+
out["hydration_status"] = "blocked"
295+
out["hydration_reason"] = "low_signal_source"
296+
return out
297+
298+
topic, _ = detect_topic(source_text)
299+
if topic == "general":
300+
out["hydration_status"] = "blocked"
301+
out["hydration_reason"] = "insufficient_context_specificity"
302+
return out
303+
177304
target_user = out.get("target_user") or candidate.get("author") or "builder"
178-
reply_text = build_reply_text(account, role, target_user, candidate.get("text", ""))
305+
idx_seed = len(seen["reply_norms"]) + len(target_user) + len(tweet_id)
306+
reply_raw = build_reply_text(role, target_user, source_text, idx_seed)
307+
anchor = source_anchor(source_text)
308+
if anchor and len(anchor.split()) >= 2:
309+
reply_raw = f"{reply_raw} Specific to {anchor}."
310+
reply_text = enforce_style(reply_raw, remove_links=no_links_in_replies, no_hashtags=no_hashtags)
179311

180312
if role == "founder" and contains_denylist(reply_text, founder_keywords):
181313
out["hydration_status"] = "blocked"
182314
out["hydration_reason"] = "founder_denylist_hit"
183315
return out
184316

185-
tweet_id = str(candidate["id"])
317+
norm = canonical_reply(reply_text)
318+
if norm in seen["reply_norms"]:
319+
out["hydration_status"] = "blocked"
320+
out["hydration_reason"] = "duplicate_reply_text"
321+
return out
322+
323+
seen["tweet_ids"].add(tweet_id)
324+
seen["reply_norms"].add(norm)
325+
186326
out["target_user"] = target_user
187327
out["target_tweet_id"] = tweet_id
188328
out["reply_text"] = reply_text
@@ -204,12 +344,14 @@ def hydrate_review(review: dict, policy: dict, resolver: Resolver) -> dict:
204344
ready = 0
205345
blocked = 0
206346

347+
seen = {"tweet_ids": set(), "reply_norms": set()}
348+
207349
for account, payload in review.get("accounts", {}).items():
208350
approved = payload.get("approved_actions", [])
209351
hydrated_approved = []
210352
for action in approved:
211353
total += 1
212-
h = hydrate_single_action(action, policy, resolver)
354+
h = hydrate_single_action(action, policy, resolver, seen)
213355
hydrated_approved.append(h)
214356
if h.get("hydration_status") == "hydrated":
215357
ready += 1

0 commit comments

Comments
 (0)