-
Notifications
You must be signed in to change notification settings - Fork 55
Add ignore param to createDocuments for silent duplicate handling #850
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
721b9ed
b62e93e
36b048f
eb564c3
59aab83
e85729d
8cec7c7
ba749b9
1c40fab
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1365,6 +1365,35 @@ public function updateDocument(Document $collection, string $id, Document $docum | |
| return $document; | ||
| } | ||
|
|
||
| protected function getInsertKeyword(bool $ignore): string | ||
| { | ||
| return 'INSERT INTO'; | ||
| } | ||
|
|
||
| protected function getInsertSuffix(bool $ignore, string $table): string | ||
| { | ||
| if (!$ignore) { | ||
| return ''; | ||
| } | ||
|
|
||
| $conflictTarget = $this->sharedTables ? '("_uid", "_tenant")' : '("_uid")'; | ||
|
|
||
| return "ON CONFLICT {$conflictTarget} DO NOTHING"; | ||
| } | ||
|
|
||
| protected function getInsertPermissionsSuffix(bool $ignore): string | ||
| { | ||
| if (!$ignore) { | ||
| return ''; | ||
| } | ||
|
|
||
| $conflictTarget = $this->sharedTables | ||
| ? '("_type", "_permission", "_document", "_tenant")' | ||
| : '("_type", "_permission", "_document")'; | ||
|
|
||
| return "ON CONFLICT {$conflictTarget} DO NOTHING"; | ||
| } | ||
|
Comment on lines
+1373
to
+1395
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
The unique indexes created in
This means every
The collation must be specified in the inference list to match the actual index. For example: // getInsertSuffix — non-shared
$conflictTarget = '("_uid" COLLATE utf8_ci_ai)';
// getInsertSuffix — shared
$conflictTarget = '("_uid" COLLATE utf8_ci_ai, "_tenant")';
// getInsertPermissionsSuffix — non-shared
$conflictTarget = '("_document" COLLATE utf8_ci_ai, "_type", "_permission")';
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not an issue. The existing |
||
|
|
||
| /** | ||
| * @param string $tableName | ||
| * @param string $columns | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -2471,7 +2471,7 @@ protected function execute(mixed $stmt): bool | |
| * @throws DuplicateException | ||
| * @throws \Throwable | ||
| */ | ||
| public function createDocuments(Document $collection, array $documents): array | ||
| public function createDocuments(Document $collection, array $documents, bool $ignore = false): array | ||
| { | ||
| if (empty($documents)) { | ||
| return $documents; | ||
|
|
@@ -2573,8 +2573,9 @@ public function createDocuments(Document $collection, array $documents): array | |
| $batchKeys = \implode(', ', $batchKeys); | ||
|
|
||
| $stmt = $this->getPDO()->prepare(" | ||
| INSERT INTO {$this->getSQLTable($name)} {$columns} | ||
| {$this->getInsertKeyword($ignore)} {$this->getSQLTable($name)} {$columns} | ||
| VALUES {$batchKeys} | ||
| {$this->getInsertSuffix($ignore, $name)} | ||
| "); | ||
|
|
||
| foreach ($bindValues as $key => $value) { | ||
|
|
@@ -2588,8 +2589,9 @@ public function createDocuments(Document $collection, array $documents): array | |
| $permissions = \implode(', ', $permissions); | ||
|
|
||
| $sqlPermissions = " | ||
| INSERT INTO {$this->getSQLTable($name . '_perms')} (_type, _permission, _document {$tenantColumn}) | ||
| VALUES {$permissions}; | ||
| {$this->getInsertKeyword($ignore)} {$this->getSQLTable($name . '_perms')} (_type, _permission, _document {$tenantColumn}) | ||
| VALUES {$permissions} | ||
| {$this->getInsertPermissionsSuffix($ignore)} | ||
| "; | ||
coderabbitai[bot] marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| $stmtPermissions = $this->getPDO()->prepare($sqlPermissions); | ||
|
|
@@ -2608,6 +2610,33 @@ public function createDocuments(Document $collection, array $documents): array | |
| return $documents; | ||
| } | ||
|
|
||
| /** | ||
| * Returns the INSERT keyword, optionally with IGNORE for duplicate handling. | ||
| * Override in adapter subclasses for DB-specific syntax. | ||
| */ | ||
| protected function getInsertKeyword(bool $ignore): string | ||
| { | ||
| return $ignore ? 'INSERT IGNORE INTO' : 'INSERT INTO'; | ||
|
Comment on lines
+2617
to
+2619
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
MySQL/MariaDB's For strict duplicate-only suppression on MariaDB/MySQL, consider catching the specific PDO error code
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Documents are validated by |
||
| } | ||
|
|
||
| /** | ||
| * Returns a suffix appended after VALUES clause for duplicate handling. | ||
| * Override in adapter subclasses (e.g., Postgres uses ON CONFLICT DO NOTHING). | ||
| */ | ||
| protected function getInsertSuffix(bool $ignore, string $table): string | ||
| { | ||
| return ''; | ||
| } | ||
|
|
||
| /** | ||
| * Returns a suffix for the permissions INSERT statement when ignoring duplicates. | ||
| * Override in adapter subclasses for DB-specific syntax. | ||
| */ | ||
| protected function getInsertPermissionsSuffix(bool $ignore): string | ||
| { | ||
| return ''; | ||
| } | ||
|
|
||
| /** | ||
| * @param Document $collection | ||
| * @param string $attribute | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
onNextnever fires for successfully-inserted documents in a mixed-duplicate batchWith
ordered: false, MongoDB processes every document in the batch and only raisesBulkWriteExceptionafter completing all writes — meaning some documents may have been genuinely persisted before the exception. When the catch block returns[],Database.phpiterates that empty array and callsonNextzero times, silently swallowing results for every document actually written in that batch.Callers that use
onNextto stream processed results (cache warming, post-insert hooks, progress tracking) will miss every successfully inserted document from any batch that contained at least one duplicate — which is the common case forignore: truebulk-loads.A more complete fix would recover succeeded writes from the exception itself via
$e->getWriteResult()->getInsertedIds()to re-fetch and return only the actually-inserted documents, rather than returning[]unconditionally.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Known limitation. This only triggers on a race condition — pre-fetch already filters known duplicates before the adapter. The
utopia-php/mongoclient does not exposeBulkWriteException::getWriteResult(), so we cannot recover which docs succeeded in a mixed batch. For the migration use case (single writer), this race cannot occur. Documented as accepted behavior.