Skip to content

Commit e0d07b4

Browse files
committed
remove foreign key constraint for buffer tables
1 parent c3fd6e8 commit e0d07b4

File tree

3 files changed

+13
-36
lines changed

3 files changed

+13
-36
lines changed

src/crawlee/storage_clients/_sql/_client_mixin.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -282,11 +282,15 @@ async def _drop(self) -> None:
282282
This operation is irreversible. Uses CASCADE deletion to remove all related items.
283283
"""
284284
stmt = delete(self._METADATA_TABLE).where(self._METADATA_TABLE.id == self._id)
285+
# Delete the buffer records with a separate query, since table don't link via foreign key.
286+
buffer_stmt = delete(self._BUFFER_TABLE).where(self._BUFFER_TABLE.storage_id == self._id)
287+
285288
async with self.get_session(with_simple_commit=True) as session:
286289
if self._storage_client.get_dialect_name() == 'sqlite':
287290
# foreign_keys=ON is set at the connection level. Required for cascade deletion.
288291
await session.execute(text('PRAGMA foreign_keys=ON'))
289292
await session.execute(stmt)
293+
await session.execute(buffer_stmt)
290294

291295
@overload
292296
async def _get_metadata(self, metadata_model: type[DatasetMetadata]) -> DatasetMetadata: ...

src/crawlee/storage_clients/_sql/_db_models.py

Lines changed: 8 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -87,11 +87,6 @@ class DatasetMetadataDb(StorageMetadataDb, Base):
8787
back_populates='dataset', cascade='all, delete-orphan', lazy='noload'
8888
)
8989

90-
# Relationship to metadata buffer updates
91-
buffer: Mapped[list[DatasetMetadataBufferDb]] = relationship(
92-
back_populates='dataset', cascade='all, delete-orphan', lazy='noload'
93-
)
94-
9590
id = synonym('dataset_id')
9691
"""Alias for dataset_id to match Pydantic expectations."""
9792

@@ -132,11 +127,6 @@ class RequestQueueMetadataDb(StorageMetadataDb, Base):
132127
back_populates='queue', cascade='all, delete-orphan', lazy='noload'
133128
)
134129

135-
# Relationship to metadata buffer updates
136-
buffer: Mapped[list[RequestQueueMetadataBufferDb]] = relationship(
137-
back_populates='queue', cascade='all, delete-orphan', lazy='noload'
138-
)
139-
140130
id = synonym('request_queue_id')
141131
"""Alias for request_queue_id to match Pydantic expectations."""
142132

@@ -154,11 +144,6 @@ class KeyValueStoreMetadataDb(StorageMetadataDb, Base):
154144
back_populates='kvs', cascade='all, delete-orphan', lazy='noload'
155145
)
156146

157-
# Relationship to metadata buffer updates
158-
buffer: Mapped[list[KeyValueStoreMetadataBufferDb]] = relationship(
159-
back_populates='kvs', cascade='all, delete-orphan', lazy='noload'
160-
)
161-
162147
id = synonym('key_value_store_id')
163148
"""Alias for key_value_store_id to match Pydantic expectations."""
164149

@@ -317,14 +302,10 @@ class KeyValueStoreMetadataBufferDb(MetadataBufferDb, Base):
317302

318303
__tablename__ = 'key_value_store_metadata_buffer'
319304

320-
key_value_store_id: Mapped[str] = mapped_column(
321-
String(20), ForeignKey('key_value_stores.key_value_store_id', ondelete='CASCADE'), nullable=False, index=True
322-
)
305+
# Don't use foreign key constraint to avoid DB locks on high concurrency.
306+
key_value_store_id: Mapped[str] = mapped_column(String(20), nullable=False, index=True)
323307
"""ID of the key-value store being updated."""
324308

325-
# Relationship back to key-value store metadata
326-
kvs: Mapped[KeyValueStoreMetadataDb] = relationship(back_populates='buffer')
327-
328309
storage_id = synonym('key_value_store_id')
329310
"""Alias for key_value_store_id to match SqlClientMixin expectations."""
330311

@@ -334,18 +315,14 @@ class DatasetMetadataBufferDb(MetadataBufferDb, Base):
334315

335316
__tablename__ = 'dataset_metadata_buffer'
336317

337-
dataset_id: Mapped[str] = mapped_column(
338-
String(20), ForeignKey('datasets.dataset_id', ondelete='CASCADE'), nullable=False, index=True
339-
)
318+
# Don't use foreign key constraint to avoid DB locks on high concurrency.
319+
dataset_id: Mapped[str] = mapped_column(String(20), nullable=False, index=True)
340320
"""ID of the dataset being updated."""
341321

342-
# Counter deltas - use SUM when aggregating
322+
# Counter deltas - use SUM when aggregating.
343323
delta_item_count: Mapped[int | None] = mapped_column(Integer, nullable=True)
344324
"""Delta for dataset item_count."""
345325

346-
# Relationship back to dataset metadata
347-
dataset: Mapped[DatasetMetadataDb] = relationship(back_populates='buffer')
348-
349326
storage_id = synonym('dataset_id')
350327
"""Alias for dataset_id to match SqlClientMixin expectations."""
351328

@@ -357,15 +334,14 @@ class RequestQueueMetadataBufferDb(MetadataBufferDb, Base):
357334

358335
__table_args__ = (Index('idx_rq_client', 'request_queue_id', 'client_id'),)
359336

360-
request_queue_id: Mapped[str] = mapped_column(
361-
String(20), ForeignKey('request_queues.request_queue_id', ondelete='CASCADE'), nullable=False, index=True
362-
)
337+
# Don't use foreign key constraint to avoid DB locks on high concurrency.
338+
request_queue_id: Mapped[str] = mapped_column(String(20), nullable=False, index=True)
363339
"""ID of the request queue being updated."""
364340

365341
client_id: Mapped[str] = mapped_column(String(32), nullable=False)
366342
"""Identifier of the client making this update."""
367343

368-
# Counter deltas - use SUM when aggregating
344+
# Counter deltas - use SUM when aggregating.
369345
delta_handled_count: Mapped[int | None] = mapped_column(Integer, nullable=True)
370346
"""Delta for handled_request_count."""
371347

@@ -378,8 +354,5 @@ class RequestQueueMetadataBufferDb(MetadataBufferDb, Base):
378354
need_recalc: Mapped[bool | None] = mapped_column(Boolean, nullable=False, default=False)
379355
"""Flag indicating that counters need recalculation from actual data."""
380356

381-
# Relationship back to request queue metadata
382-
queue: Mapped[RequestQueueMetadataDb] = relationship(back_populates='buffer')
383-
384357
storage_id = synonym('request_queue_id')
385358
"""Alias for request_queue_id to match SqlClientMixin expectations."""

src/crawlee/storage_clients/_sql/_request_queue_client.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -444,7 +444,7 @@ async def fetch_next_request(self) -> Request | None:
444444

445445
update_stmt = (
446446
update(self._ITEM_TABLE)
447-
.where(self._ITEM_TABLE.request_id.in_(request_ids))
447+
.where(self._ITEM_TABLE.request_queue_id == self._id, self._ITEM_TABLE.request_id.in_(request_ids))
448448
.values(time_blocked_until=block_until, client_key=self.client_key)
449449
)
450450
await session.execute(update_stmt)

0 commit comments

Comments
 (0)