Skip to content

Commit ae929db

Browse files
committed
Restore per-tenant grouping for batch existing-doc lookups in tenantPerDocument
When inlining find() per Jake's review, the per-tenant grouping the deleted fetchExistingByIds helper used to do was lost. In tenantPerDocument mode with a multi-tenant batch, the inlined find() runs under the session tenant context (often null for platform workers) and silently misses rows belonging to other tenants — every input doc looks "new" and goes down the create path instead of the update path. This actively breaks appwrite's StatsUsage worker, which accumulates stats across many projects (each tagged with its own $tenant) and flushes them via $dbForLogs->setTenant(null)->setTenantPerDocument(true) followed by upsertDocumentsWithIncrease(...). With the old per-doc getDocument loop, each lookup ran under withTenant($doc->getTenant()) and was correct. The batch find() inlining lost that scoping. Fix per Greptile's suggestion: branch on tenantPerDocument mode and run one find() per unique tenant value under withTenant, merging results into the same lookup hash. K queries (K = unique tenants in the batch) instead of N (per-doc) or 1 (broken). Common single-tenant case still hits the fast batched path. Applied at both call sites: - Database::createDocuments skipDuplicates pre-filter - Database::upsertDocumentsWithIncrease existing-docs lookup Fixes greptile flag at PR #852 discussion r3077481595.
1 parent e9e5e76 commit ae929db

1 file changed

Lines changed: 87 additions & 37 deletions

File tree

src/Database/Database.php

Lines changed: 87 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -5695,32 +5695,57 @@ public function createDocuments(
56955695
}
56965696
$documents = $deduplicated;
56975697

5698-
$docIds = \array_values(\array_unique(\array_filter(
5699-
\array_map(fn (Document $doc) => $doc->getId(), $documents),
5700-
fn ($id) => $id !== ''
5701-
)));
5702-
5703-
if (!empty($docIds)) {
5704-
$existing = $this->authorization->skip(fn () => $this->silent(
5705-
fn () => $this->find($collection->getId(), [
5706-
Query::equal('$id', $docIds),
5707-
Query::select(['$id']),
5708-
Query::limit(\count($docIds)),
5709-
])
5710-
));
5711-
5712-
$preExistingIds = [];
5713-
foreach ($existing as $doc) {
5714-
$preExistingIds[$this->tenantKey($doc)] = true;
5698+
$preExistingIds = [];
5699+
5700+
// tenantPerDocument: group ids by tenant and run one find() per tenant under
5701+
// withTenant, so cross-tenant batches don't get silently scoped to the session
5702+
// tenant and miss rows belonging to other tenants.
5703+
if ($this->getSharedTables() && $this->getTenantPerDocument()) {
5704+
$idsByTenant = [];
5705+
foreach ($documents as $doc) {
5706+
if ($doc->getId() !== '') {
5707+
$idsByTenant[$doc->getTenant()][] = $doc->getId();
5708+
}
57155709
}
5716-
5717-
if (!empty($preExistingIds)) {
5718-
$documents = \array_values(\array_filter(
5719-
$documents,
5720-
fn (Document $doc) => !isset($preExistingIds[$this->tenantKey($doc)])
5710+
foreach ($idsByTenant as $tenant => $tenantIds) {
5711+
$tenantIds = \array_values(\array_unique($tenantIds));
5712+
$found = $this->authorization->skip(fn () => $this->withTenant($tenant, fn () => $this->silent(
5713+
fn () => $this->find($collection->getId(), [
5714+
Query::equal('$id', $tenantIds),
5715+
Query::select(['$id']),
5716+
Query::limit(\count($tenantIds)),
5717+
])
5718+
)));
5719+
foreach ($found as $doc) {
5720+
$preExistingIds[$tenant . ':' . $doc->getId()] = true;
5721+
}
5722+
}
5723+
} else {
5724+
$docIds = \array_values(\array_unique(\array_filter(
5725+
\array_map(fn (Document $doc) => $doc->getId(), $documents),
5726+
fn ($id) => $id !== ''
5727+
)));
5728+
5729+
if (!empty($docIds)) {
5730+
$existing = $this->authorization->skip(fn () => $this->silent(
5731+
fn () => $this->find($collection->getId(), [
5732+
Query::equal('$id', $docIds),
5733+
Query::select(['$id']),
5734+
Query::limit(\count($docIds)),
5735+
])
57215736
));
5737+
foreach ($existing as $doc) {
5738+
$preExistingIds[$this->tenantKey($doc)] = true;
5739+
}
57225740
}
57235741
}
5742+
5743+
if (!empty($preExistingIds)) {
5744+
$documents = \array_values(\array_filter(
5745+
$documents,
5746+
fn (Document $doc) => !isset($preExistingIds[$this->tenantKey($doc)])
5747+
));
5748+
}
57245749
}
57255750

57265751
foreach ($documents as $document) {
@@ -7189,22 +7214,47 @@ public function upsertDocumentsWithIncrease(
71897214
$updated = 0;
71907215
$seenIds = [];
71917216

7192-
// Batch-fetch existing documents in one query instead of N individual getDocument() calls
7193-
$docIds = \array_values(\array_unique(\array_filter(
7194-
\array_map(fn (Document $doc) => $doc->getId(), $documents),
7195-
fn ($id) => $id !== ''
7196-
)));
7197-
7217+
// Batch-fetch existing documents in one query instead of N individual getDocument() calls.
7218+
// tenantPerDocument: group ids by tenant and run one find() per tenant under withTenant,
7219+
// so cross-tenant batches (e.g. StatsUsage worker) don't get silently scoped to the
7220+
// session tenant and miss rows belonging to other tenants.
71987221
$existingDocs = [];
7199-
if (!empty($docIds)) {
7200-
$existing = $this->authorization->skip(fn () => $this->silent(
7201-
fn () => $this->find($collection->getId(), [
7202-
Query::equal('$id', $docIds),
7203-
Query::limit(\count($docIds)),
7204-
])
7205-
));
7206-
foreach ($existing as $doc) {
7207-
$existingDocs[$this->tenantKey($doc)] = $doc;
7222+
7223+
if ($this->getSharedTables() && $this->getTenantPerDocument()) {
7224+
$idsByTenant = [];
7225+
foreach ($documents as $doc) {
7226+
if ($doc->getId() !== '') {
7227+
$idsByTenant[$doc->getTenant()][] = $doc->getId();
7228+
}
7229+
}
7230+
foreach ($idsByTenant as $tenant => $tenantIds) {
7231+
$tenantIds = \array_values(\array_unique($tenantIds));
7232+
$found = $this->authorization->skip(fn () => $this->withTenant($tenant, fn () => $this->silent(
7233+
fn () => $this->find($collection->getId(), [
7234+
Query::equal('$id', $tenantIds),
7235+
Query::limit(\count($tenantIds)),
7236+
])
7237+
)));
7238+
foreach ($found as $doc) {
7239+
$existingDocs[$tenant . ':' . $doc->getId()] = $doc;
7240+
}
7241+
}
7242+
} else {
7243+
$docIds = \array_values(\array_unique(\array_filter(
7244+
\array_map(fn (Document $doc) => $doc->getId(), $documents),
7245+
fn ($id) => $id !== ''
7246+
)));
7247+
7248+
if (!empty($docIds)) {
7249+
$existing = $this->authorization->skip(fn () => $this->silent(
7250+
fn () => $this->find($collection->getId(), [
7251+
Query::equal('$id', $docIds),
7252+
Query::limit(\count($docIds)),
7253+
])
7254+
));
7255+
foreach ($existing as $doc) {
7256+
$existingDocs[$this->tenantKey($doc)] = $doc;
7257+
}
72087258
}
72097259
}
72107260

0 commit comments

Comments
 (0)