Skip to content

Commit 2787eba

Browse files
committed
feat: improve mega-query test with concrete score assertions and better seed data
- Richer seed body texts for dramatic BM25 score separation - Semantic vector embeddings clustered by category (restaurant/park/museum) - Concrete assertions: BM25 ranking order, trgm similarity thresholds, tsvRank ranges - Fix vectorEmbedding filter shape: use flat {vector, distance} (not nested nearby/embedding) - 3 new pgvector cluster tests (restaurant, park, museum directions) - searchScore composite signal test combining tsvector + BM25 + trgm - 52 tests total, all passing
1 parent 3be2036 commit 2787eba

2 files changed

Lines changed: 295 additions & 100 deletions

File tree

graphql/orm-test/__fixtures__/seed/mega-seed.sql

Lines changed: 27 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -111,55 +111,64 @@ INSERT INTO mega_test.categories (id, name, description) VALUES
111111
(3, 'Museums', 'Cultural institutions');
112112

113113
-- Locations
114+
--
115+
-- Embedding semantics: dim 0 = restaurant/food, dim 1 = park/nature, dim 2 = museum/art.
116+
-- This gives vector queries meaningful cluster behaviour.
117+
--
118+
-- Body texts are tuned so BM25 produces clear score separation:
119+
-- • "park" query → Prospect Park dominates (3× "park" in a short body)
120+
-- • "museum art" → MoMA edges out Met Museum (more concentrated terms)
121+
-- • "cafe coffee" → Central Park Cafe is the sole match
122+
--
114123
INSERT INTO mega_test.locations (id, name, body, category_id, geom, embedding, tsv, is_active, rating) VALUES
115124
(1, 'Central Park Cafe',
116-
'A cozy cafe in the heart of Central Park serving organic coffee and pastries',
125+
'A cozy cafe in the heart of Central Park with organic coffee, fresh pastries, and beautiful park-side seating',
117126
1,
118127
ST_SetSRID(ST_MakePoint(-73.968, 40.785), 4326),
119128
'[1, 0, 0]',
120-
to_tsvector('english', 'cozy cafe central park organic coffee pastries'),
129+
to_tsvector('english', 'cozy cafe central park organic coffee fresh pastries beautiful park-side seating'),
121130
true, 4.5),
122131
(2, 'Brooklyn Bridge Park',
123-
'A scenic waterfront park with stunning views of the Manhattan skyline',
132+
'A scenic waterfront park with stunning views of the Manhattan skyline and playgrounds along the East River',
124133
2,
125134
ST_SetSRID(ST_MakePoint(-73.996, 40.698), 4326),
126135
'[0, 1, 0]',
127-
to_tsvector('english', 'scenic waterfront park stunning views manhattan skyline'),
136+
to_tsvector('english', 'scenic waterfront park stunning views manhattan skyline playgrounds east river'),
128137
true, 4.8),
129138
(3, 'MoMA',
130-
'The Museum of Modern Art featuring contemporary and modern art collections',
139+
'The Museum of Modern Art is a world-renowned institution of contemporary art and modern art, housing paintings, sculptures, film, and groundbreaking design',
131140
3,
132141
ST_SetSRID(ST_MakePoint(-73.978, 40.761), 4326),
133142
'[0, 0, 1]',
134-
to_tsvector('english', 'museum modern art contemporary collections'),
143+
to_tsvector('english', 'museum modern art world-renowned institution contemporary art modern art paintings sculptures film groundbreaking design'),
135144
true, 4.7),
136145
(4, 'Times Square Diner',
137-
'A classic American diner near Times Square open twenty four hours',
146+
'A classic American diner in the bustling heart of Times Square serving breakfast around the clock',
138147
1,
139148
ST_SetSRID(ST_MakePoint(-73.985, 40.758), 4326),
140-
'[0.707, 0.707, 0]',
141-
to_tsvector('english', 'classic american diner times square twenty four hours'),
149+
'[0.8, 0.6, 0]',
150+
to_tsvector('english', 'classic american diner bustling heart times square serving breakfast around clock'),
142151
false, 3.2),
143152
(5, 'High Line Park',
144-
'An elevated linear park built on historic freight rail lines above the streets',
153+
'An elevated linear park on a historic freight rail line with gardens, art installations, and sweeping Hudson River views',
145154
2,
146155
ST_SetSRID(ST_MakePoint(-74.005, 40.748), 4326),
147-
'[0.577, 0.577, 0.577]',
148-
to_tsvector('english', 'elevated linear park historic freight rail lines streets'),
156+
'[0.3, 0.7, 0.3]',
157+
to_tsvector('english', 'elevated linear park historic freight rail line gardens art installations sweeping hudson river views'),
149158
true, 4.9),
150159
(6, 'Met Museum',
151-
'The Metropolitan Museum of Art with encyclopedic art collections spanning five thousand years',
160+
'The Metropolitan Museum of Art spans five thousand years of art from every corner of the world, with over two million works of art in its collection',
152161
3,
153162
ST_SetSRID(ST_MakePoint(-73.963, 40.779), 4326),
154-
'[0, 0.707, 0.707]',
155-
to_tsvector('english', 'metropolitan museum art encyclopedic collections spanning five thousand years'),
163+
'[0.1, 0.1, 0.9]',
164+
to_tsvector('english', 'metropolitan museum art five thousand years art every corner world two million works art collection'),
156165
true, 4.6),
157166
(7, 'Prospect Park',
158-
'A large public park in Brooklyn designed by the creators of Central Park',
167+
'A sprawling urban park designed by Olmsted and Vaux, this beloved Brooklyn park features a lake, meadows, wooded ravines, and miles of park trails',
159168
2,
160169
ST_SetSRID(ST_MakePoint(-73.969, 40.660), 4326),
161-
'[0.333, 0.333, 0.333]',
162-
to_tsvector('english', 'large public park brooklyn creators central park'),
170+
'[0.1, 0.9, 0.1]',
171+
to_tsvector('english', 'sprawling urban park designed olmsted vaux beloved brooklyn park features lake meadows wooded ravines miles park trails'),
163172
true, NULL);
164173

165174
-- Tags (backward relation filter)

0 commit comments

Comments
 (0)