|
| 1 | +#!/usr/bin/env python3 |
| 2 | +import json |
| 3 | +from collections import Counter |
| 4 | +from datetime import datetime, timedelta, timezone |
| 5 | +from pathlib import Path |
| 6 | + |
| 7 | +ROOT = Path(__file__).resolve().parents[1] |
| 8 | +DATA = ROOT / "data" |
| 9 | +REPORTS = ROOT / "reports" |
| 10 | +REPORTS.mkdir(parents=True, exist_ok=True) |
| 11 | + |
| 12 | +OUT_JSON = REPORTS / "fiverr-playbook-reconstruction.json" |
| 13 | +OUT_MD = REPORTS / "fiverr-playbook-reconstruction.md" |
| 14 | + |
| 15 | +LOOKBACK_DAYS = 21 |
| 16 | +SHORT_LEN = 90 |
| 17 | +OPENERS = ["Totally agree", "Agree", "Appreciate", "Nice take", "We value", "Right then"] |
| 18 | + |
| 19 | + |
| 20 | +def parse_dt(ts: str): |
| 21 | + return datetime.fromisoformat(ts.replace("Z", "+00:00")) |
| 22 | + |
| 23 | + |
| 24 | +def is_reply(tweet: dict) -> bool: |
| 25 | + return any(r.get("type") == "replied_to" for r in (tweet.get("referenced_tweets") or []) if isinstance(r, dict)) |
| 26 | + |
| 27 | + |
| 28 | +def normalize_snapshot(path: Path): |
| 29 | + payload = json.loads(path.read_text()) |
| 30 | + captured_at = payload.get("captured_at") |
| 31 | + items = [] |
| 32 | + for handle, account in payload.get("accounts", {}).items(): |
| 33 | + for t in account.get("timeline", []): |
| 34 | + created = t.get("created_at") |
| 35 | + if not created: |
| 36 | + continue |
| 37 | + text = t.get("text", "") |
| 38 | + pm = t.get("public_metrics", {}) |
| 39 | + items.append( |
| 40 | + { |
| 41 | + "handle": handle, |
| 42 | + "tweet_id": t.get("id"), |
| 43 | + "created_at": created, |
| 44 | + "captured_at": captured_at, |
| 45 | + "is_reply": is_reply(t), |
| 46 | + "is_short": len(text) <= SHORT_LEN, |
| 47 | + "starts_with_at": text.startswith("@"), |
| 48 | + "opener": next((o for o in OPENERS if o.lower() in text.lower()), None), |
| 49 | + "impressions": pm.get("impression_count", 0), |
| 50 | + "likes": pm.get("like_count", 0), |
| 51 | + "text": text, |
| 52 | + } |
| 53 | + ) |
| 54 | + return items |
| 55 | + |
| 56 | + |
| 57 | +def main() -> None: |
| 58 | + snapshots = sorted(DATA.glob("snapshot-*.json")) |
| 59 | + if not snapshots: |
| 60 | + raise SystemExit("No snapshots found. Run collect_x_data.py over multiple intervals first.") |
| 61 | + |
| 62 | + now = datetime.now(timezone.utc) |
| 63 | + cutoff = now - timedelta(days=LOOKBACK_DAYS) |
| 64 | + |
| 65 | + merged = {} |
| 66 | + for snap in snapshots: |
| 67 | + for i in normalize_snapshot(snap): |
| 68 | + try: |
| 69 | + created = parse_dt(i["created_at"]) |
| 70 | + except Exception: |
| 71 | + continue |
| 72 | + if created < cutoff: |
| 73 | + continue |
| 74 | + key = (i["handle"], i["tweet_id"]) |
| 75 | + merged[key] = i |
| 76 | + |
| 77 | + records = list(merged.values()) |
| 78 | + |
| 79 | + by_account = {} |
| 80 | + for r in records: |
| 81 | + by_account.setdefault(r["handle"], []).append(r) |
| 82 | + |
| 83 | + report = { |
| 84 | + "generated_at": now.isoformat(), |
| 85 | + "lookback_days": LOOKBACK_DAYS, |
| 86 | + "snapshot_count": len(snapshots), |
| 87 | + "accounts": {}, |
| 88 | + } |
| 89 | + |
| 90 | + for handle, arr in by_account.items(): |
| 91 | + replies = [x for x in arr if x["is_reply"]] |
| 92 | + short_replies = [x for x in replies if x["is_short"]] |
| 93 | + generic = [x for x in replies if x["opener"]] |
| 94 | + |
| 95 | + # proxy “worked” vs “did not” (can be improved later with follower-normalized rates) |
| 96 | + worked = [x for x in replies if (x["likes"] > 0 or x["impressions"] >= 10)] |
| 97 | + failed = [x for x in replies if (x["likes"] == 0 and x["impressions"] < 10)] |
| 98 | + |
| 99 | + report["accounts"][handle] = { |
| 100 | + "total_posts": len(arr), |
| 101 | + "reply_count": len(replies), |
| 102 | + "reply_ratio": round(len(replies) / len(arr), 3) if arr else None, |
| 103 | + "short_reply_ratio": round(len(short_replies) / len(replies), 3) if replies else None, |
| 104 | + "generic_opener_ratio": round(len(generic) / len(replies), 3) if replies else None, |
| 105 | + "top_openers": Counter([x["opener"] for x in generic]).most_common(6), |
| 106 | + "worked_count": len(worked), |
| 107 | + "failed_count": len(failed), |
| 108 | + "worked_examples": [{"id":x["tweet_id"],"text":x["text"][:140],"likes":x["likes"],"impressions":x["impressions"]} for x in worked[:5]], |
| 109 | + "failed_examples": [{"id":x["tweet_id"],"text":x["text"][:140],"likes":x["likes"],"impressions":x["impressions"]} for x in failed[:5]], |
| 110 | + } |
| 111 | + |
| 112 | + OUT_JSON.write_text(json.dumps(report, indent=2)) |
| 113 | + |
| 114 | + lines = [ |
| 115 | + "# Fiverr-Style Playbook Reconstruction", |
| 116 | + f"Generated: {report['generated_at']}", |
| 117 | + f"Snapshots analyzed: {report['snapshot_count']}", |
| 118 | + "", |
| 119 | + ] |
| 120 | + for h, s in report["accounts"].items(): |
| 121 | + lines += [ |
| 122 | + f"## @{h}", |
| 123 | + f"- total posts: {s['total_posts']}", |
| 124 | + f"- reply ratio: {s['reply_ratio']}", |
| 125 | + f"- short-reply ratio: {s['short_reply_ratio']}", |
| 126 | + f"- generic opener ratio: {s['generic_opener_ratio']}", |
| 127 | + f"- worked/failed (proxy): {s['worked_count']}/{s['failed_count']}", |
| 128 | + f"- top openers: {s['top_openers']}", |
| 129 | + "", |
| 130 | + ] |
| 131 | + |
| 132 | + OUT_MD.write_text("\n".join(lines)) |
| 133 | + print(str(OUT_JSON)) |
| 134 | + print(str(OUT_MD)) |
| 135 | + |
| 136 | + |
| 137 | +if __name__ == "__main__": |
| 138 | + main() |
0 commit comments