Skip to content

Commit dd009a5

Browse files
authored
Merge pull request #859 from utopia-php/CLO-4204-query-fingerprint
feat: add Query::fingerprint() for shape-only query hashing
2 parents ee2d7d4 + f16c7ee commit dd009a5

2 files changed

Lines changed: 203 additions & 0 deletions

File tree

src/Database/Query.php

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -419,6 +419,102 @@ public static function parseQueries(array $queries): array
419419
return $parsed;
420420
}
421421

422+
/**
423+
* Compute a shape-only fingerprint of an array of queries.
424+
*
425+
* The fingerprint captures the structure of the queries — method and
426+
* attribute — without values. Two query sets with the same shape but
427+
* different parameter values produce the same fingerprint, which is
428+
* useful for pattern-based counting and slow-query grouping.
429+
*
430+
* Logical queries (`and`, `or`, `elemMatch`) contribute their inner
431+
* structure to the hash via `Query::shape()` — two `and(...)` queries
432+
* with different child shapes produce different fingerprints.
433+
*
434+
* Accepts either raw query strings or parsed Query objects.
435+
*
436+
* @param array<mixed> $queries raw query strings or Query instances
437+
* @return string md5 hash of the canonical shape
438+
* @throws QueryException if an element is neither a string nor a Query
439+
*/
440+
public static function fingerprint(array $queries): string
441+
{
442+
$shapes = [];
443+
444+
foreach ($queries as $query) {
445+
if (\is_string($query)) {
446+
$query = self::parse($query);
447+
}
448+
449+
if (!$query instanceof self) {
450+
throw new QueryException('Invalid query element for fingerprint: expected string or Query instance');
451+
}
452+
453+
$shapes[] = $query->shape();
454+
}
455+
456+
\sort($shapes);
457+
458+
return \md5(\implode('|', $shapes));
459+
}
460+
461+
/**
462+
* Canonical shape string for this Query — values excluded.
463+
*
464+
* Non-logical queries produce `method:attribute`. Logical queries
465+
* (`and`, `or`, `elemMatch`) produce `method:attribute(child1|child2|…)`
466+
* with children sorted so child order does not affect the shape.
467+
*
468+
* Implemented iteratively: walks the tree into a preorder list via a
469+
* stack, then processes the reversed list so each node's children are
470+
* always resolved before the node itself.
471+
*
472+
* @return string
473+
*/
474+
public function shape(): string
475+
{
476+
// 1. Preorder flatten the tree.
477+
$nodes = [];
478+
$stack = [$this];
479+
while ($stack) {
480+
/** @var self $node */
481+
$node = \array_pop($stack);
482+
$nodes[] = $node;
483+
484+
if (!\in_array($node->method, self::LOGICAL_TYPES, true)) {
485+
continue;
486+
}
487+
foreach ($node->values as $child) {
488+
if ($child instanceof self) {
489+
$stack[] = $child;
490+
}
491+
}
492+
}
493+
494+
// 2. Process reversed so children are always shaped before parents.
495+
$shapes = [];
496+
foreach (\array_reverse($nodes) as $node) {
497+
$id = \spl_object_id($node);
498+
499+
if (!\in_array($node->method, self::LOGICAL_TYPES, true)) {
500+
$shapes[$id] = $node->method . ':' . $node->attribute;
501+
continue;
502+
}
503+
504+
$childShapes = [];
505+
foreach ($node->values as $child) {
506+
if ($child instanceof self) {
507+
$childShapes[] = $shapes[\spl_object_id($child)];
508+
}
509+
}
510+
\sort($childShapes);
511+
// Attribute is empty for and/or; meaningful for elemMatch (the field being matched).
512+
$shapes[$id] = $node->method . ':' . $node->attribute . '(' . \implode('|', $childShapes) . ')';
513+
}
514+
515+
return $shapes[\spl_object_id($this)];
516+
}
517+
422518
/**
423519
* @return array<string, mixed>
424520
*/

tests/unit/QueryTest.php

Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -468,4 +468,111 @@ public function testNewQueryTypesInTypesArray(): void
468468
$this->assertContains(Query::TYPE_NOT_BETWEEN, Query::TYPES);
469469
$this->assertContains(Query::TYPE_ORDER_RANDOM, Query::TYPES);
470470
}
471+
472+
public function testFingerprint(): void
473+
{
474+
$equalAlice = '{"method":"equal","attribute":"name","values":["Alice"]}';
475+
$equalBob = '{"method":"equal","attribute":"name","values":["Bob"]}';
476+
$equalEmail = '{"method":"equal","attribute":"email","values":["a@b.c"]}';
477+
$notEqualAlice = '{"method":"notEqual","attribute":"name","values":["Alice"]}';
478+
$gtAge18 = '{"method":"greaterThan","attribute":"age","values":[18]}';
479+
$gtAge42 = '{"method":"greaterThan","attribute":"age","values":[42]}';
480+
481+
// Same shape, different values produce the same fingerprint
482+
$a = Query::fingerprint([$equalAlice, $gtAge18]);
483+
$b = Query::fingerprint([$equalBob, $gtAge42]);
484+
$this->assertSame($a, $b);
485+
486+
// Different attribute produces different fingerprint
487+
$c = Query::fingerprint([$equalEmail, $gtAge18]);
488+
$this->assertNotSame($a, $c);
489+
490+
// Different method produces different fingerprint
491+
$d = Query::fingerprint([$notEqualAlice, $gtAge18]);
492+
$this->assertNotSame($a, $d);
493+
494+
// Order-independent
495+
$e = Query::fingerprint([$gtAge18, $equalAlice]);
496+
$this->assertSame($a, $e);
497+
498+
// Accepts parsed Query objects
499+
$parsed = [Query::equal('name', ['Alice']), Query::greaterThan('age', 18)];
500+
$f = Query::fingerprint($parsed);
501+
$this->assertSame($a, $f);
502+
503+
// Empty array returns deterministic hash
504+
$this->assertSame(\md5(''), Query::fingerprint([]));
505+
}
506+
507+
public function testFingerprintNestedLogicalQueries(): void
508+
{
509+
// AND queries with different inner shapes produce different fingerprints
510+
$andEqName = Query::and([Query::equal('name', ['Alice'])]);
511+
$andEqEmail = Query::and([Query::equal('email', ['a@b.c'])]);
512+
$this->assertNotSame(Query::fingerprint([$andEqName]), Query::fingerprint([$andEqEmail]));
513+
514+
// AND queries with same inner shape produce the same fingerprint (values differ)
515+
$andEqNameBob = Query::and([Query::equal('name', ['Bob'])]);
516+
$this->assertSame(Query::fingerprint([$andEqName]), Query::fingerprint([$andEqNameBob]));
517+
518+
// Order of children inside a logical query does not matter
519+
$andA = Query::and([Query::equal('name', ['Alice']), Query::greaterThan('age', 18)]);
520+
$andB = Query::and([Query::greaterThan('age', 42), Query::equal('name', ['Bob'])]);
521+
$this->assertSame(Query::fingerprint([$andA]), Query::fingerprint([$andB]));
522+
523+
// AND of two filters differs from OR of the same two filters
524+
$orA = Query::or([Query::equal('name', ['Alice']), Query::greaterThan('age', 18)]);
525+
$this->assertNotSame(Query::fingerprint([$andA]), Query::fingerprint([$orA]));
526+
527+
// AND with one child differs from AND with two children
528+
$andOne = Query::and([Query::equal('name', ['Alice'])]);
529+
$andTwo = Query::and([Query::equal('name', ['Alice']), Query::greaterThan('age', 18)]);
530+
$this->assertNotSame(Query::fingerprint([$andOne]), Query::fingerprint([$andTwo]));
531+
532+
// elemMatch attribute matters: same inner shape on different fields must NOT collide
533+
$elemTags = new Query(Query::TYPE_ELEM_MATCH, 'tags', [Query::equal('name', ['php'])]);
534+
$elemCategories = new Query(Query::TYPE_ELEM_MATCH, 'categories', [Query::equal('name', ['php'])]);
535+
$this->assertNotSame(Query::fingerprint([$elemTags]), Query::fingerprint([$elemCategories]));
536+
537+
// elemMatch values-only change (same field, same child shape) still collides — as expected
538+
$elemTagsOther = new Query(Query::TYPE_ELEM_MATCH, 'tags', [Query::equal('name', ['js'])]);
539+
$this->assertSame(Query::fingerprint([$elemTags]), Query::fingerprint([$elemTagsOther]));
540+
}
541+
542+
public function testFingerprintRejectsInvalidElements(): void
543+
{
544+
$this->expectException(QueryException::class);
545+
Query::fingerprint([42]);
546+
}
547+
548+
public function testShape(): void
549+
{
550+
// Leaf queries
551+
$this->assertSame('equal:name', Query::equal('name', ['Alice'])->shape());
552+
$this->assertSame('greaterThan:age', Query::greaterThan('age', 18)->shape());
553+
554+
// Logical with empty attribute
555+
$and = Query::and([Query::equal('name', ['Alice']), Query::greaterThan('age', 18)]);
556+
$this->assertSame('and:(equal:name|greaterThan:age)', $and->shape());
557+
558+
// elemMatch preserves the attribute (the field being matched)
559+
$elem = new Query(Query::TYPE_ELEM_MATCH, 'tags', [Query::equal('name', ['php'])]);
560+
$this->assertSame('elemMatch:tags(equal:name)', $elem->shape());
561+
562+
// Deeply nested — iterative traversal must match recursive result
563+
$deep = Query::and([
564+
Query::or([
565+
Query::equal('a', ['x']),
566+
Query::and([
567+
Query::equal('b', ['y']),
568+
Query::lessThan('c', 5),
569+
]),
570+
]),
571+
Query::greaterThan('d', 10),
572+
]);
573+
$this->assertSame(
574+
'and:(greaterThan:d|or:(and:(equal:b|lessThan:c)|equal:a))',
575+
$deep->shape(),
576+
);
577+
}
471578
}

0 commit comments

Comments
 (0)