Skip to content

Commit 41704eb

Browse files
committed
Address Jake's review on PR #852
#1 Drop $enable flag on skipDuplicates() scope guard The $enable param made every non-skipDuplicates createDocuments call pay for a closure allocation + extra function call. Branch at the call site instead so the cost only applies when the flag is actually set. - Adapter::skipDuplicates(callable, bool) → skipDuplicates(callable) - Database::skipDuplicates(callable, bool) → skipDuplicates(callable) - Database::createDocuments, Mirror::createDocuments, Pool::delegate, Pool::withTransaction now branch inline. #2 Drop fetchExistingByIds helper, inline find() The helper's per-tenant grouping defended a hypothetical multi-tenant batching scenario that no caller exercises (relationships are intra- tenant, callers always batch per tenant). Existing patterns in the same file (refetchDocuments, relationship loading) just call find() directly. Match that idiom and drop ~70 lines. #4 Mirror: only capture inserted docs in skipDuplicates mode The captureOnNext accumulator paid the cost (closure + per-doc array push) on every createDocuments call, including the common non-skip path. Branch at the entry of Mirror::createDocuments so the capture only happens when skipDuplicates is set; the non-skip path passes through to source/destination unchanged. #5 Move getInsertKeyword/Suffix/PermissionsSuffix to getters cluster Were sitting next to createDocuments(); moved to the getSupport* cluster around line 1030 where other adapter-capability shims live. Not addressed: - #2 partial: the existing patterns (refetchDocuments etc.) don't handle tenant-per-document multi-tenant batches either, so this is consistent. - #3 (drop the pre-filter): rejected. createDocumentRelationships runs in the encoding loop BEFORE the adapter's INSERT IGNORE no-ops the parent, so dropping the pre-filter would deterministically duplicate child rows on every CSV re-import of a collection with relationships (not a race window — every call). The relationships test verifies this. Reverting would require reintroducing the deferred-relationships scaffolding we just removed, and the adapter still couldn't tell us which parents were actually inserted (SQL INSERT IGNORE has no per-row reporting). Pre-filter stays.
1 parent 89e4cf8 commit 41704eb

5 files changed

Lines changed: 134 additions & 156 deletions

File tree

src/Database/Adapter.php

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -39,20 +39,13 @@ abstract class Adapter
3939
* Run a callback with skipDuplicates enabled.
4040
* Duplicate key errors during createDocuments() will be silently skipped
4141
* instead of thrown. Nestable — saves and restores previous state.
42-
* Pass $enable = false to run the callback without toggling the flag
43-
* (useful for conditional forwarding from wrappers like Pool/Mirror).
4442
*
4543
* @template T
4644
* @param callable(): T $callback
47-
* @param bool $enable
4845
* @return T
4946
*/
50-
public function skipDuplicates(callable $callback, bool $enable = true): mixed
47+
public function skipDuplicates(callable $callback): mixed
5148
{
52-
if (!$enable) {
53-
return $callback();
54-
}
55-
5649
$previous = $this->skipDuplicates;
5750
$this->skipDuplicates = true;
5851

src/Database/Adapter/Pool.php

Lines changed: 18 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -43,10 +43,12 @@ public function __construct(UtopiaPool $pool)
4343
public function delegate(string $method, array $args): mixed
4444
{
4545
if ($this->pinnedAdapter !== null) {
46-
return $this->pinnedAdapter->skipDuplicates(
47-
fn () => $this->pinnedAdapter->{$method}(...$args),
48-
$this->skipDuplicates
49-
);
46+
if ($this->skipDuplicates) {
47+
return $this->pinnedAdapter->skipDuplicates(
48+
fn () => $this->pinnedAdapter->{$method}(...$args)
49+
);
50+
}
51+
return $this->pinnedAdapter->{$method}(...$args);
5052
}
5153

5254
return $this->pool->use(function (Adapter $adapter) use ($method, $args) {
@@ -69,10 +71,12 @@ public function delegate(string $method, array $args): mixed
6971
$adapter->setMetadata($key, $value);
7072
}
7173

72-
return $adapter->skipDuplicates(
73-
fn () => $adapter->{$method}(...$args),
74-
$this->skipDuplicates
75-
);
74+
if ($this->skipDuplicates) {
75+
return $adapter->skipDuplicates(
76+
fn () => $adapter->{$method}(...$args)
77+
);
78+
}
79+
return $adapter->{$method}(...$args);
7680
});
7781
}
7882

@@ -152,10 +156,12 @@ public function withTransaction(callable $callback): mixed
152156

153157
$this->pinnedAdapter = $adapter;
154158
try {
155-
return $adapter->skipDuplicates(
156-
fn () => $adapter->withTransaction($callback),
157-
$this->skipDuplicates
158-
);
159+
if ($this->skipDuplicates) {
160+
return $adapter->skipDuplicates(
161+
fn () => $adapter->withTransaction($callback)
162+
);
163+
}
164+
return $adapter->withTransaction($callback);
159165
} finally {
160166
$this->pinnedAdapter = null;
161167
}

src/Database/Adapter/SQL.php

Lines changed: 27 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1029,6 +1029,33 @@ public function getSupportForHostname(): bool
10291029
return true;
10301030
}
10311031

1032+
/**
1033+
* Returns the INSERT keyword, optionally with IGNORE for duplicate handling.
1034+
* Override in adapter subclasses for DB-specific syntax.
1035+
*/
1036+
protected function getInsertKeyword(): string
1037+
{
1038+
return $this->skipDuplicates ? 'INSERT IGNORE INTO' : 'INSERT INTO';
1039+
}
1040+
1041+
/**
1042+
* Returns a suffix appended after VALUES clause for duplicate handling.
1043+
* Override in adapter subclasses (e.g., Postgres uses ON CONFLICT DO NOTHING).
1044+
*/
1045+
protected function getInsertSuffix(string $table): string
1046+
{
1047+
return '';
1048+
}
1049+
1050+
/**
1051+
* Returns a suffix for the permissions INSERT statement when ignoring duplicates.
1052+
* Override in adapter subclasses for DB-specific syntax.
1053+
*/
1054+
protected function getInsertPermissionsSuffix(): string
1055+
{
1056+
return '';
1057+
}
1058+
10321059
/**
10331060
* Get current attribute count from collection document
10341061
*
@@ -2611,33 +2638,6 @@ public function createDocuments(Document $collection, array $documents): array
26112638
return $documents;
26122639
}
26132640

2614-
/**
2615-
* Returns the INSERT keyword, optionally with IGNORE for duplicate handling.
2616-
* Override in adapter subclasses for DB-specific syntax.
2617-
*/
2618-
protected function getInsertKeyword(): string
2619-
{
2620-
return $this->skipDuplicates ? 'INSERT IGNORE INTO' : 'INSERT INTO';
2621-
}
2622-
2623-
/**
2624-
* Returns a suffix appended after VALUES clause for duplicate handling.
2625-
* Override in adapter subclasses (e.g., Postgres uses ON CONFLICT DO NOTHING).
2626-
*/
2627-
protected function getInsertSuffix(string $table): string
2628-
{
2629-
return '';
2630-
}
2631-
2632-
/**
2633-
* Returns a suffix for the permissions INSERT statement when ignoring duplicates.
2634-
* Override in adapter subclasses for DB-specific syntax.
2635-
*/
2636-
protected function getInsertPermissionsSuffix(): string
2637-
{
2638-
return '';
2639-
}
2640-
26412641
/**
26422642
* @param Document $collection
26432643
* @param string $attribute

src/Database/Database.php

Lines changed: 46 additions & 81 deletions
Original file line numberDiff line numberDiff line change
@@ -844,12 +844,8 @@ public function skipRelationshipsExistCheck(callable $callback): mixed
844844
}
845845
}
846846

847-
public function skipDuplicates(callable $callback, bool $enable = true): mixed
847+
public function skipDuplicates(callable $callback): mixed
848848
{
849-
if (!$enable) {
850-
return $callback();
851-
}
852-
853849
$previous = $this->skipDuplicates;
854850
$this->skipDuplicates = true;
855851

@@ -871,72 +867,6 @@ private function tenantKey(Document $document): string
871867
: $document->getId();
872868
}
873869

874-
/**
875-
* Batch-fetch documents by ID from a collection, keyed by tenantKey().
876-
* Chunks by maxQueryValues and groups queries by tenant in tenant-per-document mode.
877-
*
878-
* @param Document $collection
879-
* @param array<Document> $documents Source documents (only IDs are read)
880-
* @param bool $idsOnly When true, uses Query::select(['$id']) for a lighter fetch
881-
* @return array<string, Document>
882-
*/
883-
private function fetchExistingByIds(Document $collection, array $documents, bool $idsOnly = false): array
884-
{
885-
$collectionId = $collection->getId();
886-
$tenantPerDocument = $this->adapter->getSharedTables() && $this->adapter->getTenantPerDocument();
887-
$result = [];
888-
889-
$buildQueries = function (array $chunk) use ($idsOnly) {
890-
$queries = [Query::equal('$id', $chunk)];
891-
if ($idsOnly) {
892-
$queries[] = Query::select(['$id']);
893-
}
894-
$queries[] = Query::limit(\count($chunk));
895-
return $queries;
896-
};
897-
898-
if ($tenantPerDocument) {
899-
$idsByTenant = [];
900-
foreach ($documents as $doc) {
901-
$id = $doc->getId();
902-
if ($id !== '') {
903-
$idsByTenant[$doc->getTenant()][] = $id;
904-
}
905-
}
906-
foreach ($idsByTenant as $tenant => $tenantIds) {
907-
$tenantIds = \array_values(\array_unique($tenantIds));
908-
foreach (\array_chunk($tenantIds, \max(1, $this->maxQueryValues)) as $idChunk) {
909-
$fetched = $this->authorization->skip(
910-
fn () => $this->withTenant(
911-
$tenant,
912-
fn () => $this->silent(fn () => $this->find($collectionId, $buildQueries($idChunk)))
913-
)
914-
);
915-
foreach ($fetched as $doc) {
916-
$result[$tenant . ':' . $doc->getId()] = $doc;
917-
}
918-
}
919-
}
920-
921-
return $result;
922-
}
923-
924-
$ids = \array_values(\array_unique(\array_filter(
925-
\array_map(fn (Document $doc) => $doc->getId(), $documents),
926-
fn ($v) => $v !== null && $v !== ''
927-
)));
928-
foreach (\array_chunk($ids, \max(1, $this->maxQueryValues)) as $idChunk) {
929-
$fetched = $this->authorization->skip(
930-
fn () => $this->silent(fn () => $this->find($collectionId, $buildQueries($idChunk)))
931-
);
932-
foreach ($fetched as $doc) {
933-
$result[$doc->getId()] = $doc;
934-
}
935-
}
936-
937-
return $result;
938-
}
939-
940870
/**
941871
* Trigger callback for events
942872
*
@@ -5765,12 +5695,31 @@ public function createDocuments(
57655695
}
57665696
$documents = $deduplicated;
57675697

5768-
$preExistingIds = $this->fetchExistingByIds($collection, $documents, idsOnly: true);
5769-
if (!empty($preExistingIds)) {
5770-
$documents = \array_values(\array_filter(
5771-
$documents,
5772-
fn (Document $doc) => !isset($preExistingIds[$this->tenantKey($doc)])
5698+
$docIds = \array_values(\array_unique(\array_filter(
5699+
\array_map(fn (Document $doc) => $doc->getId(), $documents),
5700+
fn ($id) => $id !== ''
5701+
)));
5702+
5703+
if (!empty($docIds)) {
5704+
$existing = $this->authorization->skip(fn () => $this->silent(
5705+
fn () => $this->find($collection->getId(), [
5706+
Query::equal('$id', $docIds),
5707+
Query::select(['$id']),
5708+
Query::limit(\count($docIds)),
5709+
])
57735710
));
5711+
5712+
$preExistingIds = [];
5713+
foreach ($existing as $doc) {
5714+
$preExistingIds[$this->tenantKey($doc)] = true;
5715+
}
5716+
5717+
if (!empty($preExistingIds)) {
5718+
$documents = \array_values(\array_filter(
5719+
$documents,
5720+
fn (Document $doc) => !isset($preExistingIds[$this->tenantKey($doc)])
5721+
));
5722+
}
57745723
}
57755724
}
57765725

@@ -5821,11 +5770,11 @@ public function createDocuments(
58215770
}
58225771

58235772
foreach (\array_chunk($documents, $batchSize) as $chunk) {
5773+
$insert = fn () => $this->withTransaction(fn () => $this->adapter->createDocuments($collection, $chunk));
58245774
// Set adapter flag before withTransaction so Mongo can opt out of a real txn.
5825-
$batch = $this->adapter->skipDuplicates(
5826-
fn () => $this->withTransaction(fn () => $this->adapter->createDocuments($collection, $chunk)),
5827-
$this->skipDuplicates
5828-
);
5775+
$batch = $this->skipDuplicates
5776+
? $this->adapter->skipDuplicates($insert)
5777+
: $insert();
58295778

58305779
$batch = $this->adapter->getSequences($collection->getId(), $batch);
58315780

@@ -7241,7 +7190,23 @@ public function upsertDocumentsWithIncrease(
72417190
$seenIds = [];
72427191

72437192
// Batch-fetch existing documents in one query instead of N individual getDocument() calls
7244-
$existingDocs = $this->fetchExistingByIds($collection, $documents);
7193+
$docIds = \array_values(\array_unique(\array_filter(
7194+
\array_map(fn (Document $doc) => $doc->getId(), $documents),
7195+
fn ($id) => $id !== ''
7196+
)));
7197+
7198+
$existingDocs = [];
7199+
if (!empty($docIds)) {
7200+
$existing = $this->authorization->skip(fn () => $this->silent(
7201+
fn () => $this->find($collection->getId(), [
7202+
Query::equal('$id', $docIds),
7203+
Query::limit(\count($docIds)),
7204+
])
7205+
));
7206+
foreach ($existing as $doc) {
7207+
$existingDocs[$this->tenantKey($doc)] = $doc;
7208+
}
7209+
}
72457210

72467211
foreach ($documents as $key => $document) {
72477212
$old = $existingDocs[$this->tenantKey($document)] ?? new Document();

0 commit comments

Comments
 (0)