-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtest_data_generator.py
More file actions
87 lines (70 loc) · 2.63 KB
/
test_data_generator.py
File metadata and controls
87 lines (70 loc) · 2.63 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
#!/usr/bin/env python3
"""
Test data generator - generate realistic test data for databases, APIs, forms, etc.
"""
import asyncio
import sys
from copilot import CopilotClient
async def main():
"""Generate realistic test data using Copilot."""
if len(sys.argv) < 3:
print("Usage: python test_data_generator.py <schema> <count> [format]")
print("\nFormats:")
print(" json - JSON array (default)")
print(" sql - SQL INSERT statements")
print(" csv - CSV format")
print(" python - Python list/dict")
print("\nSchema examples:")
print(" user, product, order, customer")
print(' custom: "name:str,age:int,email:str"')
print("\nExamples:")
print(" python test_data_generator.py user 10")
print(" python test_data_generator.py product 50 sql")
print(' python test_data_generator.py "name:str,email:str" 20 json')
sys.exit(1)
schema = sys.argv[1]
try:
count = int(sys.argv[2])
except ValueError:
print("❌ Error: Count must be a number")
sys.exit(1)
format_type = sys.argv[3].lower() if len(sys.argv) > 3 else "json"
print("🎲 Test Data Generator")
print(f"📋 Schema: {schema}")
print(f"🔢 Count: {count}")
print(f"📄 Format: {format_type}\n")
client = CopilotClient()
await client.start()
try:
session = await client.create_session({"model": "gpt-5-mini"})
format_specs = {
'json': "JSON array of objects",
'sql': "SQL INSERT statements for a table",
'csv': "CSV format with header row",
'python': "Python list of dictionaries"
}
prompt = f"""Generate {count} realistic test data records.
Schema: {schema}
Format: {format_type} ({format_specs.get(format_type, format_specs['json'])})
Requirements:
1. Realistic data (proper names, valid emails, reasonable values)
2. Diverse data (different ages, locations, etc.)
3. Proper formatting for {format_type}
4. No duplicates
Common schemas:
- user: id, name, email, age, country
- product: id, name, price, category, stock
- order: id, user_id, product_id, quantity, total, date
- customer: id, first_name, last_name, email, phone, address
Generate the data now:"""
print("🤖 Generated Data:\n")
print("-" * 80)
response = await session.send_and_wait({"prompt": prompt})
print(response.data.content)
print("-" * 80)
print("\n✅ Generation complete!")
await session.destroy()
finally:
await client.stop()
if __name__ == "__main__":
asyncio.run(main())