-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdebug-entity-detection.ts
More file actions
103 lines (86 loc) · 3.43 KB
/
debug-entity-detection.ts
File metadata and controls
103 lines (86 loc) · 3.43 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
#!/usr/bin/env bun
/**
* Debug Entity Type Detection
* Test the entity type detection logic to see why NL queries fail
*/
// Copy the identifyEntityType function from orchestrator
function identifyEntityType(query: string): string | null {
const normalizedQuery = query.toLowerCase();
const entityPatterns = [
/(?:find|show|list|get|search)\s+([a-z]+s?)\b/i,
/\b([a-z]+s?)\s+(?:that|with|containing|having)\b/i,
/\b([a-z]+s?)'?s?\s+[a-z]+/i,
];
for (const pattern of entityPatterns) {
const match = normalizedQuery.match(pattern);
if (match && match[1]) {
let entityType = match[1].toLowerCase();
if (entityType.endsWith('ies')) {
entityType = entityType.slice(0, -3) + 'y';
} else if (entityType.endsWith('es')) {
entityType = entityType.slice(0, -2);
} else if (entityType.endsWith('s') && entityType.length > 3) {
entityType = entityType.slice(0, -1);
}
const excludeWords = ['this', 'that', 'the', 'and', 'or', 'but', 'with', 'from', 'by', 'at', 'in', 'on', 'to', 'for', 'of', 'as', 'is', 'are', 'was', 'were', 'be', 'been', 'have', 'has', 'had', 'do', 'does', 'did', 'will', 'would', 'could', 'should', 'may', 'might', 'can', 'all', 'any', 'some', 'more', 'most', 'less', 'least', 'first', 'last', 'next', 'previous'];
if (!excludeWords.includes(entityType) && entityType.length > 2) {
return entityType;
}
}
}
return 'item';
}
// Test the failing queries
const failingQueries = [
"users with gmail emails",
"events in 2024",
"active users in engineering",
"products containing iPhone"
];
const workingQueries = [
"products between 400 and 1000 dollars",
"users skilled in Python"
];
console.log("🔍 ENTITY TYPE DETECTION DEBUG\n");
console.log("❌ FAILING QUERIES:");
failingQueries.forEach(query => {
const entityType = identifyEntityType(query);
console.log(`"${query}" → entity type: "${entityType}"`);
// Test each pattern individually
const normalizedQuery = query.toLowerCase();
const patterns = [
{ name: "find/show/list pattern", regex: /(?:find|show|list|get|search)\s+([a-z]+s?)\b/i },
{ name: "entity + with/containing pattern", regex: /\b([a-z]+s?)\s+(?:that|with|containing|having)\b/i },
{ name: "possessive pattern", regex: /\b([a-z]+s?)'?s?\s+[a-z]+/i }
];
patterns.forEach(pattern => {
const match = normalizedQuery.match(pattern.regex);
if (match) {
console.log(` ✓ Matched "${pattern.name}": captured "${match[1]}"`);
} else {
console.log(` ✗ No match for "${pattern.name}"`);
}
});
console.log();
});
console.log("✅ WORKING QUERIES:");
workingQueries.forEach(query => {
const entityType = identifyEntityType(query);
console.log(`"${query}" → entity type: "${entityType}"`);
// Test each pattern individually
const normalizedQuery = query.toLowerCase();
const patterns = [
{ name: "find/show/list pattern", regex: /(?:find|show|list|get|search)\s+([a-z]+s?)\b/i },
{ name: "entity + with/containing pattern", regex: /\b([a-z]+s?)\s+(?:that|with|containing|having)\b/i },
{ name: "possessive pattern", regex: /\b([a-z]+s?)'?s?\s+[a-z]+/i }
];
patterns.forEach(pattern => {
const match = normalizedQuery.match(pattern.regex);
if (match) {
console.log(` ✓ Matched "${pattern.name}": captured "${match[1]}"`);
} else {
console.log(` ✗ No match for "${pattern.name}"`);
}
});
console.log();
});