-
Notifications
You must be signed in to change notification settings - Fork 55
Add skipDuplicates() scope guard to createDocuments #855
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -417,6 +417,8 @@ class Database | |
|
|
||
| protected bool $preserveDates = false; | ||
|
|
||
| protected bool $skipDuplicates = false; | ||
|
|
||
| protected bool $preserveSequence = false; | ||
|
|
||
| protected int $maxQueryValues = 5000; | ||
|
|
@@ -842,6 +844,29 @@ public function skipRelationshipsExistCheck(callable $callback): mixed | |
| } | ||
| } | ||
|
|
||
| public function skipDuplicates(callable $callback): mixed | ||
| { | ||
| $previous = $this->skipDuplicates; | ||
| $this->skipDuplicates = true; | ||
|
|
||
| try { | ||
| return $callback(); | ||
| } finally { | ||
| $this->skipDuplicates = $previous; | ||
| } | ||
| } | ||
|
|
||
| /** | ||
| * Build a tenant-aware identity key for a document. | ||
| * Returns "<tenant>:<id>" in tenant-per-document shared-table mode, otherwise just the id. | ||
| */ | ||
| private function tenantKey(Document $document): string | ||
| { | ||
| return ($this->adapter->getSharedTables() && $this->adapter->getTenantPerDocument()) | ||
| ? $document->getTenant() . ':' . $document->getId() | ||
| : $document->getId(); | ||
| } | ||
|
|
||
| /** | ||
| * Trigger callback for events | ||
| * | ||
|
|
@@ -5700,9 +5725,11 @@ public function createDocuments( | |
| } | ||
|
|
||
| foreach (\array_chunk($documents, $batchSize) as $chunk) { | ||
| $batch = $this->withTransaction(function () use ($collection, $chunk) { | ||
| return $this->adapter->createDocuments($collection, $chunk); | ||
| }); | ||
| $insert = fn () => $this->withTransaction(fn () => $this->adapter->createDocuments($collection, $chunk)); | ||
| // Set adapter flag before withTransaction so Mongo can opt out of a real txn. | ||
| $batch = $this->skipDuplicates | ||
| ? $this->adapter->skipDuplicates($insert) | ||
| : $insert(); | ||
|
Comment on lines
+5728
to
+5732
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Line 5728 only guards the adapter insert. By then, 🤖 Prompt for AI Agents |
||
|
|
||
| $batch = $this->adapter->getSequences($collection->getId(), $batch); | ||
|
|
||
|
|
@@ -7116,18 +7143,53 @@ public function upsertDocumentsWithIncrease( | |
| $created = 0; | ||
| $updated = 0; | ||
| $seenIds = []; | ||
| foreach ($documents as $key => $document) { | ||
| if ($this->getSharedTables() && $this->getTenantPerDocument()) { | ||
| $old = $this->authorization->skip(fn () => $this->withTenant($document->getTenant(), fn () => $this->silent(fn () => $this->getDocument( | ||
| $collection->getId(), | ||
| $document->getId(), | ||
| )))); | ||
| } else { | ||
| $old = $this->authorization->skip(fn () => $this->silent(fn () => $this->getDocument( | ||
| $collection->getId(), | ||
| $document->getId(), | ||
|
|
||
| // Batch-fetch existing documents in one query instead of N individual getDocument() calls. | ||
| // tenantPerDocument: group ids by tenant and run one find() per tenant under withTenant, | ||
| // so cross-tenant batches (e.g. StatsUsage worker) don't get silently scoped to the | ||
| // session tenant and miss rows belonging to other tenants. | ||
| $existingDocs = []; | ||
|
|
||
| if ($this->getSharedTables() && $this->getTenantPerDocument()) { | ||
| $idsByTenant = []; | ||
| foreach ($documents as $doc) { | ||
| if ($doc->getId() !== '') { | ||
| $idsByTenant[$doc->getTenant()][] = $doc->getId(); | ||
| } | ||
| } | ||
| foreach ($idsByTenant as $tenant => $tenantIds) { | ||
| $tenantIds = \array_values(\array_unique($tenantIds)); | ||
| $found = $this->authorization->skip(fn () => $this->withTenant($tenant, fn () => $this->silent( | ||
| fn () => $this->find($collection->getId(), [ | ||
| Query::equal('$id', $tenantIds), | ||
| Query::limit(\count($tenantIds)), | ||
| ]) | ||
| ))); | ||
| foreach ($found as $doc) { | ||
| $existingDocs[$tenant . ':' . $doc->getId()] = $doc; | ||
| } | ||
| } | ||
| } else { | ||
| $docIds = \array_values(\array_unique(\array_filter( | ||
| \array_map(fn (Document $doc) => $doc->getId(), $documents), | ||
| fn ($id) => $id !== '' | ||
| ))); | ||
|
|
||
| if (!empty($docIds)) { | ||
| $existing = $this->authorization->skip(fn () => $this->silent( | ||
| fn () => $this->find($collection->getId(), [ | ||
| Query::equal('$id', $docIds), | ||
| Query::limit(\count($docIds)), | ||
| ]) | ||
| )); | ||
| foreach ($existing as $doc) { | ||
| $existingDocs[$this->tenantKey($doc)] = $doc; | ||
| } | ||
| } | ||
|
Comment on lines
+7153
to
7188
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Chunk these pre-read Both branches build a single 🤖 Prompt for AI Agents |
||
| } | ||
|
|
||
| foreach ($documents as $key => $document) { | ||
| $old = $existingDocs[$this->tenantKey($document)] ?? new Document(); | ||
|
|
||
| // Extract operators early to avoid comparison issues | ||
| $documentArray = $document->getArrayCopy(); | ||
|
|
@@ -7294,7 +7356,7 @@ public function upsertDocumentsWithIncrease( | |
| $document = $this->silent(fn () => $this->createDocumentRelationships($collection, $document)); | ||
| } | ||
|
|
||
| $seenIds[] = $document->getId(); | ||
| $seenIds[] = $this->tenantKey($document); | ||
| $old = $this->adapter->castingBefore($collection, $old); | ||
| $document = $this->adapter->castingBefore($collection, $document); | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
🧩 Analysis chain
🌐 Web query:
Does SQLiteINSERT OR IGNOREsuppress NOT NULL and CHECK violations in addition to UNIQUE/PRIMARY KEY conflicts, and since which SQLite version isINSERT ... ON CONFLICT(column-list) DO NOTHINGsupported?💡 Result:
Yes, SQLite INSERT OR IGNORE suppresses NOT NULL violations (skips the row silently) in addition to UNIQUE/PRIMARY KEY conflicts, but it does NOT suppress CHECK violations (those cause ABORT like default). INSERT ... ON CONFLICT(column-list) DO NOTHING (the UPSERT syntax specifying columns) has been supported since SQLite version 3.24.0 (2018-06-04).
Citations:
INSERT OR IGNOREsuppresses more than just duplicate constraints—fix to target only_uidconflicts.SQLite's
INSERT OR IGNOREsilently suppresses NOT NULL and UNIQUE/PRIMARY KEY violations alike. This means rows failing unrelated constraints (e.g., NOT NULL on a required field) are dropped without error, violating the design intent that only duplicate_uidconflicts should be silently skipped.Use
INSERT ... ON CONFLICT(_uid) DO NOTHINGinstead to suppress only the intended unique keys. This syntax is supported in SQLite 3.24.0+ and applies the constraint exception only to the specified columns.Suggested direction
protected function getInsertKeyword(): string { - return $this->skipDuplicates ? 'INSERT OR IGNORE INTO' : 'INSERT INTO'; + return 'INSERT INTO'; +} + +protected function getInsertSuffix(string $table): string +{ + if (!$this->skipDuplicates) { + return ''; + } + + $conflictTarget = $this->sharedTables ? '(`_tenant`, `_uid`)' : '(`_uid`)'; + + return "ON CONFLICT {$conflictTarget} DO NOTHING"; +} + +protected function getInsertPermissionsSuffix(): string +{ + if (!$this->skipDuplicates) { + return ''; + } + + $conflictTarget = $this->sharedTables + ? '(`_tenant`, `_document`, `_type`, `_permission`)' + : '(`_document`, `_type`, `_permission`)'; + + return "ON CONFLICT {$conflictTarget} DO NOTHING"; }🤖 Prompt for AI Agents