Skip to content

Commit 4580ba2

Browse files
committed
Nested subqueries
1 parent ba1ee9c commit 4580ba2

10 files changed

Lines changed: 1232 additions & 64 deletions

File tree

bin/bench_m2m.php

Lines changed: 335 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,335 @@
1+
<?php
2+
3+
/**
4+
* Standalone M2M Relationship Query Benchmark
5+
*
6+
* Seeds data and benchmarks M2M relationship query patterns.
7+
* Run with: php bin/bench_m2m.php
8+
*
9+
* Requires MariaDB on localhost:8703 (docker compose up -d mariadb)
10+
*/
11+
12+
ini_set('memory_limit', '2G');
13+
14+
require_once __DIR__ . '/../vendor/autoload.php';
15+
16+
use Utopia\Cache\Adapter\None as NoCache;
17+
use Utopia\Cache\Cache;
18+
use Utopia\Database\Adapter\MariaDB;
19+
use Utopia\Database\Adapter\SQL;
20+
use Utopia\Database\Database;
21+
use Utopia\Database\Document;
22+
use Utopia\Database\Helpers\Permission;
23+
use Utopia\Database\Helpers\Role;
24+
use Utopia\Database\PDO;
25+
use Utopia\Database\Query;
26+
use Utopia\Database\Validator\Authorization;
27+
28+
// --- Config ---
29+
$dbHost = getenv('MARIADB_HOST') ?: 'mariadb';
30+
$dbPort = getenv('MARIADB_PORT') ?: '3306';
31+
$numAuthors = 500;
32+
$numArticlesPerAuthor = 50; // total articles = 500 * 50 = 25000
33+
$warmup = 3;
34+
$runs = 20;
35+
$dbName = 'bench_m2m';
36+
$namespace = '_bench';
37+
38+
// --- Setup ---
39+
echo "=== M2M Relationship Query Benchmark ===\n\n";
40+
41+
$pdo = new PDO(
42+
"mysql:host={$dbHost};port={$dbPort};charset=utf8mb4",
43+
'root',
44+
'password',
45+
SQL::getPDOAttributes()
46+
);
47+
48+
$cache = new Cache(new NoCache());
49+
$authorization = new Authorization();
50+
$authorization->addRole(Role::any()->toString());
51+
$authorization->setDefaultStatus(true);
52+
53+
$database = new Database(new MariaDB($pdo), $cache);
54+
$database
55+
->setAuthorization($authorization)
56+
->setDatabase($dbName)
57+
->setNamespace($namespace);
58+
59+
// Fresh database
60+
if ($database->exists($dbName)) {
61+
$database->delete($dbName);
62+
}
63+
$database->create();
64+
65+
// Schema
66+
$database->createCollection('authors', permissions: [
67+
Permission::create(Role::any()),
68+
Permission::read(Role::any()),
69+
]);
70+
$database->createAttribute('authors', 'name', Database::VAR_STRING, 256, true);
71+
72+
$database->createCollection('articles', permissions: [
73+
Permission::create(Role::any()),
74+
Permission::read(Role::any()),
75+
]);
76+
$database->createAttribute('articles', 'title', Database::VAR_STRING, 256, true);
77+
$database->createAttribute('articles', 'genre', Database::VAR_STRING, 256, true);
78+
$database->createIndex('articles', 'idx_genre', Database::INDEX_KEY, ['genre']);
79+
80+
$database->createRelationship(
81+
'authors',
82+
'articles',
83+
Database::RELATION_MANY_TO_MANY,
84+
true,
85+
onDelete: Database::RELATION_MUTATE_SET_NULL
86+
);
87+
88+
// --- Seed Data ---
89+
echo "Seeding: {$numAuthors} authors x {$numArticlesPerAuthor} articles = "
90+
. ($numAuthors * $numArticlesPerAuthor) . " total articles\n";
91+
92+
$genres = ['fashion', 'food', 'travel', 'music', 'lifestyle', 'fitness', 'diy', 'sports', 'finance'];
93+
$names = ['Alice', 'Bob', 'Carol', 'Dave', 'Eve', 'Frank', 'Grace', 'Heidi', 'Ivan', 'Judy'];
94+
95+
$allArticleIds = [];
96+
97+
for ($a = 0; $a < $numAuthors; $a++) {
98+
$articles = [];
99+
for ($i = 0; $i < $numArticlesPerAuthor; $i++) {
100+
$articleId = 'art_' . str_pad($a, 3, '0', STR_PAD_LEFT) . '_' . str_pad($i, 3, '0', STR_PAD_LEFT);
101+
$articles[] = new Document([
102+
'$id' => $articleId,
103+
'title' => 'Article ' . ($i + 1) . ' by Author ' . $a,
104+
'genre' => $genres[array_rand($genres)],
105+
]);
106+
$allArticleIds[] = $articleId;
107+
}
108+
109+
$database->createDocument('authors', new Document([
110+
'$id' => 'author_' . str_pad($a, 3, '0', STR_PAD_LEFT),
111+
'name' => $names[$a % count($names)] . ' ' . $a,
112+
'articles' => $articles,
113+
'$permissions' => ['read("any")'],
114+
]));
115+
116+
if (($a + 1) % 10 === 0) {
117+
echo " Created {$a}/{$numAuthors} authors...\n";
118+
}
119+
}
120+
121+
echo " Seeding complete: " . count($allArticleIds) . " articles, {$numAuthors} authors\n\n";
122+
123+
// --- Benchmark ---
124+
function bench(string $label, callable $fn, int $warmup, int $runs): void
125+
{
126+
for ($i = 0; $i < $warmup; $i++) {
127+
$fn();
128+
}
129+
130+
$times = [];
131+
$resultCount = 0;
132+
for ($i = 0; $i < $runs; $i++) {
133+
$start = hrtime(true);
134+
$result = $fn();
135+
$times[] = (hrtime(true) - $start) / 1e6;
136+
$resultCount = is_countable($result) ? count($result) : 0;
137+
}
138+
139+
sort($times);
140+
$n = count($times);
141+
$median = $times[(int)($n / 2)];
142+
$mean = array_sum($times) / $n;
143+
$min = $times[0];
144+
$p95 = $times[(int)($n * 0.95)];
145+
146+
printf(
147+
" %-45s median: %7.1f ms mean: %7.1f ms min: %7.1f ms p95: %7.1f ms (%d docs)\n",
148+
$label,
149+
$median, $mean, $min, $p95,
150+
$resultCount
151+
);
152+
}
153+
154+
echo "Benchmarking ({$warmup} warmup + {$runs} measured runs each):\n\n";
155+
156+
// =======================================
157+
// GENERAL QUERIES (no relationship traversal)
158+
// =======================================
159+
echo "--- General Queries (plain find, no relationship traversal) ---\n\n";
160+
161+
// getDocument
162+
bench(
163+
"getDocument('articles', id)",
164+
fn () => $database->getDocument('articles', $allArticleIds[0]),
165+
$warmup, $runs
166+
);
167+
168+
// skipRelationships find (raw, no population)
169+
bench(
170+
"find('articles') skip-rels limit(100)",
171+
fn () => $database->skipRelationships(fn () => $database->find('articles', [Query::limit(100)])),
172+
$warmup, $runs
173+
);
174+
bench(
175+
"find('articles') skip-rels limit(1000)",
176+
fn () => $database->skipRelationships(fn () => $database->find('articles', [Query::limit(1000)])),
177+
$warmup, $runs
178+
);
179+
bench(
180+
"find('articles') skip-rels limit(5000)",
181+
fn () => $database->skipRelationships(fn () => $database->find('articles', [Query::limit(5000)])),
182+
$warmup, $runs
183+
);
184+
185+
// find WITH relationship population (authors populated on each article)
186+
bench(
187+
"find('articles') + rels limit(100)",
188+
fn () => $database->find('articles', [Query::limit(100)]),
189+
$warmup, $runs
190+
);
191+
bench(
192+
"find('articles') + rels limit(500)",
193+
fn () => $database->find('articles', [Query::limit(500)]),
194+
$warmup, $runs
195+
);
196+
197+
// find authors WITH relationship population (articles populated on each author)
198+
bench(
199+
"find('authors') + rels limit(25)",
200+
fn () => $database->find('authors', [Query::limit(25)]),
201+
$warmup, $runs
202+
);
203+
bench(
204+
"find('authors') + rels limit(100)",
205+
fn () => $database->find('authors', [Query::limit(100)]),
206+
$warmup, $runs
207+
);
208+
209+
// Filter queries (no relationship traversal)
210+
bench(
211+
"find('articles') genre='fashion' skip-rels",
212+
fn () => $database->skipRelationships(fn () => $database->find('articles', [
213+
Query::equal('genre', ['fashion']),
214+
Query::limit(5000),
215+
])),
216+
$warmup, $runs
217+
);
218+
219+
// Pagination
220+
bench(
221+
"paginate('articles') skip-rels 100/page",
222+
function () use ($database) {
223+
$cursor = null;
224+
$total = 0;
225+
do {
226+
$queries = [Query::limit(100)];
227+
if ($cursor !== null) {
228+
$queries[] = Query::cursorAfter($cursor);
229+
}
230+
$docs = $database->skipRelationships(fn () => $database->find('articles', $queries));
231+
$count = count($docs);
232+
$total += $count;
233+
if ($count > 0) {
234+
$cursor = $docs[$count - 1];
235+
}
236+
} while ($count === 100);
237+
return range(1, $total);
238+
},
239+
$warmup, $runs
240+
);
241+
242+
echo "\n--- M2M Relationship Queries ---\n\n";
243+
244+
// Pick IDs
245+
$singleArticleId = $allArticleIds[0];
246+
$threeArticleIds = [$allArticleIds[0], $allArticleIds[1], $allArticleIds[2]];
247+
$twoArticleIds = [$allArticleIds[0], $allArticleIds[1]];
248+
249+
// 1) equal('articles.$id', [single])
250+
bench(
251+
"equal('articles.\$id', [1 val])",
252+
fn () => $database->find('authors', [
253+
Query::equal('articles.$id', [$singleArticleId]),
254+
Query::limit(5000),
255+
]),
256+
$warmup, $runs
257+
);
258+
259+
// 2) equal('articles.$id', [3 values])
260+
bench(
261+
"equal('articles.\$id', [3 vals])",
262+
fn () => $database->find('authors', [
263+
Query::equal('articles.$id', $threeArticleIds),
264+
Query::limit(5000),
265+
]),
266+
$warmup, $runs
267+
);
268+
269+
// 3) containsAll('articles.$id', [2 values])
270+
bench(
271+
"containsAll('articles.\$id', [2 vals])",
272+
fn () => $database->find('authors', [
273+
Query::containsAll('articles.$id', $twoArticleIds),
274+
Query::limit(5000),
275+
]),
276+
$warmup, $runs
277+
);
278+
279+
// 4) Reverse: find articles by author
280+
bench(
281+
"equal('authors.\$id', [1 val]) [reverse]",
282+
fn () => $database->find('articles', [
283+
Query::equal('authors.$id', ['author_000']),
284+
Query::limit(5000),
285+
]),
286+
$warmup, $runs
287+
);
288+
289+
// 5) equal('articles.$id', [1]) + attribute filter
290+
bench(
291+
"equal('articles.\$id', [1]) + equal('name')",
292+
fn () => $database->find('authors', [
293+
Query::equal('articles.$id', [$singleArticleId]),
294+
Query::equal('name', ['Alice 0']),
295+
Query::limit(5000),
296+
]),
297+
$warmup, $runs
298+
);
299+
300+
// 6) Non-$id attribute query (genre) — not optimized by subquery, baseline comparison
301+
bench(
302+
"equal('articles.genre', ['fashion'])",
303+
fn () => $database->find('authors', [
304+
Query::equal('articles.genre', ['fashion']),
305+
Query::limit(5000),
306+
]),
307+
$warmup, $runs
308+
);
309+
310+
// 7) Genre query with select (no relationship population)
311+
bench(
312+
"equal('articles.genre', ['fashion']) + select",
313+
fn () => $database->find('authors', [
314+
Query::equal('articles.genre', ['fashion']),
315+
Query::select(['$id', 'name']),
316+
Query::limit(5000),
317+
]),
318+
$warmup, $runs
319+
);
320+
321+
// 8) Genre query with small limit
322+
bench(
323+
"equal('articles.genre', ['fashion']) + limit(5)",
324+
fn () => $database->find('authors', [
325+
Query::equal('articles.genre', ['fashion']),
326+
Query::limit(5),
327+
]),
328+
$warmup, $runs
329+
);
330+
331+
echo "\n=== Done ===\n";
332+
333+
// Cleanup
334+
$database->delete($dbName);
335+
echo "Database cleaned up.\n";

0 commit comments

Comments
 (0)