-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathdebug_ai_parsing.py
More file actions
298 lines (233 loc) · 9.25 KB
/
debug_ai_parsing.py
File metadata and controls
298 lines (233 loc) · 9.25 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
"""Debug script for testing AI parsing service functionality."""
import asyncio
from dotenv import load_dotenv
from src.code_search.infrastructure.configuration import get_api_key
from src.code_search.infrastructure.ai.semantic_kernel import SemanticKernelCodeParsingService
from src.code_search.domain.models import CodeMember, MemberType
# Sample code snippets for testing
SAMPLE_PYTHON_CODE = """
class UserService:
\"\"\"Service for managing user operations.\"\"\"
def __init__(self, db_connection):
self.db = db_connection
async def get_user_by_id(self, user_id: int):
\"\"\"Retrieve a user by their ID.\"\"\"
return await self.db.fetch_one("SELECT * FROM users WHERE id = ?", user_id)
async def create_user(self, user_data: dict):
\"\"\"Create a new user.\"\"\"
return await self.db.execute("INSERT INTO users...", user_data)
def validate_email(self, email: str) -> bool:
\"\"\"Validate email format.\"\"\"
import re
pattern = r'^[\\w\\.-]+@[\\w\\.-]+\\.\\w+$'
return bool(re.match(pattern, email))
class DatabaseConnection:
\"\"\"Database connection wrapper.\"\"\"
async def connect(self):
\"\"\"Establish database connection.\"\"\"
pass
async def disconnect(self):
\"\"\"Close database connection.\"\"\"
pass
"""
SAMPLE_CSHARP_CODE = """
public interface IUserRepository
{
Task<User> GetByIdAsync(int userId);
Task<User> CreateAsync(User user);
Task UpdateAsync(User user);
Task DeleteAsync(int userId);
}
public class UserService : IUserService
{
private readonly IUserRepository _userRepository;
private readonly ILogger<UserService> _logger;
public UserService(IUserRepository userRepository, ILogger<UserService> logger)
{
_userRepository = userRepository;
_logger = logger;
}
public async Task<User> GetUserByIdAsync(int userId)
{
_logger.LogInformation("Getting user with ID: {UserId}", userId);
return await _userRepository.GetByIdAsync(userId);
}
public async Task<User> CreateUserAsync(CreateUserRequest request)
{
var user = new User
{
Name = request.Name,
Email = request.Email
};
return await _userRepository.CreateAsync(user);
}
public bool ValidateEmail(string email)
{
var regex = new Regex(@"^[\\w\\.-]+@[\\w\\.-]+\\.\\w+$");
return regex.IsMatch(email);
}
}
public enum UserRole
{
Admin,
User,
Guest
}
"""
SAMPLE_TYPESCRIPT_CODE = """
interface UserRepository {
getUserById(userId: number): Promise<User>;
createUser(user: CreateUserRequest): Promise<User>;
updateUser(user: User): Promise<void>;
deleteUser(userId: number): Promise<void>;
}
class UserService implements IUserService {
private userRepository: UserRepository;
private logger: Logger;
constructor(userRepository: UserRepository, logger: Logger) {
this.userRepository = userRepository;
this.logger = logger;
}
async getUserById(userId: number): Promise<User> {
this.logger.info(`Getting user with ID: ${userId}`);
return await this.userRepository.getUserById(userId);
}
async createUser(request: CreateUserRequest): Promise<User> {
const user: User = {
name: request.name,
email: request.email,
role: UserRole.User
};
return await this.userRepository.createUser(user);
}
validateEmail(email: string): boolean {
const regex = /^[\\w\\.-]+@[\\w\\.-]+\\.\\w+$/;
return regex.test(email);
}
}
enum UserRole {
Admin = "admin",
User = "user",
Guest = "guest"
}
"""
async def test_ai_parsing_service(ai_service: SemanticKernelCodeParsingService, code: str, file_path: str, language: str):
"""Test AI parsing on a code sample."""
print(f"\n🤖 Testing AI parsing - {language}")
print(f"📁 File: {file_path}")
print("-" * 60)
try:
# Parse the code
members = await ai_service.parse_code_to_members(code, file_path)
print(f"✅ Parsing successful - Found {len(members)} members")
# Display results
for i, member in enumerate(members, 1):
print(f"\n {i}. {member.type.value.upper()}: {member.name}")
print(f" 📝 Summary: {member.summary}")
print(f" 🔒 Hash: {member.content_hash[:16]}...")
print(f" ⚙️ Methods: {len(member.methods)}")
# Show methods
for j, method in enumerate(member.methods[:3], 1): # Show max 3 methods
print(f" {j}. {method.name}: {method.summary}")
if j == 3 and len(member.methods) > 3:
print(f" ... and {len(member.methods) - 3} more methods")
return members
except Exception as e:
print(f"❌ Parsing failed: {e}")
import traceback
traceback.print_exc()
return []
async def test_parsing_edge_cases(ai_service: SemanticKernelCodeParsingService):
"""Test AI parsing with edge cases."""
print(f"\n🧪 Testing edge cases")
print("-" * 40)
# Test empty file
print("\n1. Empty file:")
members = await ai_service.parse_code_to_members("", "empty.py")
print(f" Result: {len(members)} members")
# Test comments only
print("\n2. Comments only:")
comment_code = """
# This is just a comment file
# No actual code here
# Just testing parsing
"""
members = await ai_service.parse_code_to_members(comment_code, "comments.py")
print(f" Result: {len(members)} members")
# Test malformed code
print("\n3. Malformed code:")
malformed_code = """
class BrokenClass
def method_without_colon()
return "this won't parse"
class AnotherClass:
def valid_method(self):
return "this should parse"
"""
members = await ai_service.parse_code_to_members(malformed_code, "malformed.py")
print(f" Result: {len(members)} members")
async def test_parsing_performance(ai_service: SemanticKernelCodeParsingService):
"""Test parsing performance with larger code."""
print(f"\n⏱️ Testing parsing performance")
print("-" * 40)
# Create a larger code sample
large_code = SAMPLE_PYTHON_CODE * 5 # Repeat 5 times
import time
start_time = time.time()
members = await ai_service.parse_code_to_members(large_code, "large_file.py")
end_time = time.time()
parsing_time = (end_time - start_time) * 1000
print(f" 📏 Code size: {len(large_code)} characters")
print(f" ⏱️ Parsing time: {parsing_time:.1f}ms")
print(f" 📊 Members found: {len(members)}")
print(f" 🚀 Speed: {len(large_code) / parsing_time * 1000:.0f} chars/second")
async def main():
"""Debug AI parsing functionality."""
load_dotenv()
try:
# Get API key
api_key = get_api_key()
print(f"🔧 DEBUG MODE: AI Parsing Service")
print(f"🔑 API key configured: {'Yes' if api_key else 'No'}")
# Initialize AI parsing service
ai_service = SemanticKernelCodeParsingService(api_key, model_name="gpt-4o")
await ai_service.initialize()
print("\n" + "="*60)
print("🤖 AI PARSING DEBUG TESTS")
print("="*60)
# Test different languages
test_cases = [
(SAMPLE_PYTHON_CODE, "user_service.py", "Python"),
(SAMPLE_CSHARP_CODE, "UserService.cs", "C#"),
(SAMPLE_TYPESCRIPT_CODE, "userService.ts", "TypeScript")
]
all_members = []
for code, file_path, language in test_cases:
members = await test_ai_parsing_service(ai_service, code, file_path, language)
all_members.extend(members)
# Test edge cases
await test_parsing_edge_cases(ai_service)
# Test performance
await test_parsing_performance(ai_service)
# Summary
print(f"\n📊 Summary")
print("-" * 25)
print(f" Total members parsed: {len(all_members)}")
# Count by type
type_counts = {}
method_count = 0
for member in all_members:
type_name = member.type.value
type_counts[type_name] = type_counts.get(type_name, 0) + 1
method_count += len(member.methods)
for type_name, count in type_counts.items():
print(f" {type_name.title()}es: {count}")
print(f" Total methods: {method_count}")
print("\n✅ AI parsing debug session completed!")
except Exception as e:
print(f"❌ AI Parsing Debug Error: {e}")
import traceback
traceback.print_exc()
raise
if __name__ == "__main__":
asyncio.run(main())