|
| 1 | +#!/usr/bin/env python3 |
| 2 | +import os |
| 3 | +import re |
| 4 | +import sys |
| 5 | +import argparse |
| 6 | +from collections import defaultdict |
| 7 | +from datetime import datetime |
| 8 | + |
| 9 | +def get_ts_files(root_dir): |
| 10 | + """Recursively find all TypeScript files, ignoring build/module directories.""" |
| 11 | + ignore_dirs = {'node_modules', 'dist', '.git', '.wrangler', '.vscode', 'drizzle', '.github'} |
| 12 | + ts_files = [] |
| 13 | + |
| 14 | + for dirpath, dirnames, filenames in os.walk(root_dir): |
| 15 | + # Modify dirnames in-place to skip ignored directories |
| 16 | + dirnames[:] = [d for d in dirnames if d not in ignore_dirs] |
| 17 | + for filename in filenames: |
| 18 | + if filename.endswith('.ts') or filename.endswith('.tsx'): |
| 19 | + ts_files.append(os.path.join(dirpath, filename)) |
| 20 | + |
| 21 | + return ts_files |
| 22 | + |
| 23 | +def main(): |
| 24 | + # Generate timestamp in yyyy-mm-dd 12h time format (e.g., 2026-03-13-06-44pm) |
| 25 | + now_str = datetime.now().strftime("%Y-%m-%d-%I-%M%p").lower() |
| 26 | + filename = f"drizzle-schema-report-{now_str}.md" |
| 27 | + |
| 28 | + # User's custom report location (preserving original spelling of 'hygeine') |
| 29 | + default_report_path = os.path.join(os.getcwd(), "scripts", "reports", "hygeine", filename) |
| 30 | + |
| 31 | + parser = argparse.ArgumentParser(description="Analyze Drizzle ORM schema and D1 usage.") |
| 32 | + parser.add_argument("--output", default=default_report_path, help="Output Markdown file path") |
| 33 | + args = parser.parse_args() |
| 34 | + |
| 35 | + # Ensure the target directory exists before executing the file scan |
| 36 | + os.makedirs(os.path.dirname(args.output), exist_ok=True) |
| 37 | + |
| 38 | + root_dir = os.getcwd() |
| 39 | + files = get_ts_files(root_dir) |
| 40 | + |
| 41 | + tables = [] |
| 42 | + |
| 43 | + # 1. Extract all Drizzle Table definitions |
| 44 | + # Matches: export const varName = sqliteTable('tableName', ...) |
| 45 | + table_regex = re.compile(r"export\s+const\s+([a-zA-Z0-9_]+)\s*=\s*(?:sqliteTable|pgTable|mysqlTable)\(\s*['\"]([^'\"]+)['\"]") |
| 46 | + |
| 47 | + for file_path in files: |
| 48 | + try: |
| 49 | + with open(file_path, 'r', encoding='utf-8') as f: |
| 50 | + content = f.read() |
| 51 | + matches = table_regex.findall(content) |
| 52 | + for var_name, table_name in matches: |
| 53 | + rel_path = os.path.relpath(file_path, root_dir) |
| 54 | + tables.append({ |
| 55 | + "var_name": var_name, |
| 56 | + "table_name": table_name, |
| 57 | + "file": rel_path |
| 58 | + }) |
| 59 | + except Exception as e: |
| 60 | + print(f"Warning: Could not read {file_path}: {e}") |
| 61 | + |
| 62 | + file_interactions = defaultdict(set) |
| 63 | + db1_map = defaultdict(set) # For env.DB |
| 64 | + db2_map = defaultdict(set) # For env.DB_WEBHOOKS |
| 65 | + |
| 66 | + # 2. Scan files for table imports and D1 database interactions |
| 67 | + for file_path in files: |
| 68 | + try: |
| 69 | + with open(file_path, 'r', encoding='utf-8') as f: |
| 70 | + content = f.read() |
| 71 | + |
| 72 | + rel_path = os.path.relpath(file_path, root_dir) |
| 73 | + |
| 74 | + # Look for standard Cloudflare Worker / Hono context bindings |
| 75 | + uses_db1 = 'env.DB' in content or 'c.env.DB' in content |
| 76 | + uses_db2 = 'env.DB_WEBHOOKS' in content or 'c.env.DB_WEBHOOKS' in content |
| 77 | + |
| 78 | + imported_tables = set() |
| 79 | + |
| 80 | + for t in tables: |
| 81 | + # Regex boundary check for the specific Drizzle table variable |
| 82 | + var_regex = re.compile(r"\b" + re.escape(t['var_name']) + r"\b") |
| 83 | + |
| 84 | + if var_regex.search(content): |
| 85 | + imported_tables.add(t['table_name']) |
| 86 | + |
| 87 | + if uses_db1: |
| 88 | + db1_map[t['table_name']].add(rel_path) |
| 89 | + if uses_db2: |
| 90 | + db2_map[t['table_name']].add(rel_path) |
| 91 | + |
| 92 | + if imported_tables: |
| 93 | + file_interactions[rel_path] = imported_tables |
| 94 | + |
| 95 | + except Exception as e: |
| 96 | + print(f"Warning: Could not read {file_path}: {e}") |
| 97 | + |
| 98 | + # 3. Generate the Markdown Report |
| 99 | + md = ["# Drizzle ORM Schema & D1 Analysis Report\n"] |
| 100 | + md.append("## Table Names by Database\n") |
| 101 | + |
| 102 | + md.append("### env.DB") |
| 103 | + db1_sorted = sorted(db1_map.keys()) |
| 104 | + if db1_sorted: |
| 105 | + for t in db1_sorted: |
| 106 | + md.append(f"- {t}") |
| 107 | + else: |
| 108 | + md.append("- *No tables definitively mapped to env.DB yet*") |
| 109 | + |
| 110 | + md.append("\n### env.DB_WEBHOOKS") |
| 111 | + db2_sorted = sorted(db2_map.keys()) |
| 112 | + if db2_sorted: |
| 113 | + for t in db2_sorted: |
| 114 | + md.append(f"- {t}") |
| 115 | + else: |
| 116 | + md.append("- *No tables definitively mapped to env.DB_WEBHOOKS yet*") |
| 117 | + |
| 118 | + # Catch AI Slop (Orphaned Tables) |
| 119 | + all_discovered = sorted(list(set(t['table_name'] for t in tables))) |
| 120 | + mapped_tables = set(db1_sorted + db2_sorted) |
| 121 | + unmapped = [t for t in all_discovered if t not in mapped_tables] |
| 122 | + |
| 123 | + if unmapped: |
| 124 | + md.append("\n### Unmapped / Orphaned Schema Tables") |
| 125 | + md.append("*(Suspicious AI Slop: Defined in code but no CRUD operations with a known D1 env var detected)*") |
| 126 | + for t in unmapped: |
| 127 | + md.append(f"- {t}") |
| 128 | + |
| 129 | + md.append("\n---\n\n## Code Files Interacting with D1 Tables\n") |
| 130 | + for file_path in sorted(file_interactions.keys()): |
| 131 | + tables_used = ", ".join(sorted(file_interactions[file_path])) |
| 132 | + md.append(f"### `{file_path}`") |
| 133 | + md.append(f"- **Tables Imported:** {tables_used}\n") |
| 134 | + |
| 135 | + md.append("---\n\n## env.DB d1 db") |
| 136 | + md.append("| Table Name | Short File Paths |") |
| 137 | + md.append("|---|---|") |
| 138 | + if db1_sorted: |
| 139 | + for t in db1_sorted: |
| 140 | + paths = ", ".join([f"`{p}`" for p in sorted(db1_map[t])]) |
| 141 | + md.append(f"| **{t}** | {paths} |") |
| 142 | + else: |
| 143 | + md.append("| *None Detected* | *N/A* |") |
| 144 | + |
| 145 | + md.append("\n## env.DB_WEBHOOKS d1 db") |
| 146 | + md.append("| Table Name | Short File Paths |") |
| 147 | + md.append("|---|---|") |
| 148 | + if db2_sorted: |
| 149 | + for t in db2_sorted: |
| 150 | + paths = ", ".join([f"`{p}`" for p in sorted(db2_map[t])]) |
| 151 | + md.append(f"| **{t}** | {paths} |") |
| 152 | + else: |
| 153 | + md.append("| *None Detected* | *N/A* |") |
| 154 | + |
| 155 | + # 4. Write to disk |
| 156 | + try: |
| 157 | + with open(args.output, 'w', encoding='utf-8') as f: |
| 158 | + f.write("\n".join(md) + "\n") |
| 159 | + print(f"✅ Schema analysis complete! Report generated at: {args.output}") |
| 160 | + except Exception as e: |
| 161 | + print(f"❌ Failed to write report: {e}") |
| 162 | + sys.exit(1) |
| 163 | + |
| 164 | +if __name__ == "__main__": |
| 165 | + main() |
0 commit comments