Skip to content

Commit eae231d

Browse files
committed
up docs
1 parent d4cd262 commit eae231d

File tree

3 files changed

+36
-6
lines changed

3 files changed

+36
-6
lines changed

docs/guides/storage_clients.mdx

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -208,6 +208,15 @@ class dataset_records {
208208
+ data
209209
}
210210
211+
class dataset_metadata_buffer {
212+
<<table>>
213+
+ id (PK)
214+
+ accessed_at
215+
+ modified_at
216+
+ dataset_id (FK)
217+
+ delta_item_count
218+
}
219+
211220
%% ========================
212221
%% Key-Value Store Tables
213222
%% ========================
@@ -231,15 +240,25 @@ class key_value_store_records {
231240
+ size
232241
}
233242
243+
class key_value_store_metadata_buffer {
244+
<<table>>
245+
+ id (PK)
246+
+ accessed_at
247+
+ modified_at
248+
+ key_value_store_id (FK)
249+
}
250+
234251
%% ========================
235252
%% Client to Table arrows
236253
%% ========================
237254
238255
SqlDatasetClient --> datasets
239256
SqlDatasetClient --> dataset_records
257+
SqlDatasetClient --> dataset_metadata_buffer
240258
241259
SqlKeyValueStoreClient --> key_value_stores
242260
SqlKeyValueStoreClient --> key_value_store_records
261+
SqlKeyValueStoreClient --> key_value_store_metadata_buffer
243262
```
244263
```mermaid
245264
---
@@ -294,13 +313,27 @@ class request_queue_state {
294313
+ forefront_sequence_counter
295314
}
296315
316+
class request_queue_metadata_buffer {
317+
<<table>>
318+
+ id (PK)
319+
+ accessed_at
320+
+ modified_at
321+
+ request_queue_id (FK)
322+
+ client_id
323+
+ delta_handled_count
324+
+ delta_pending_count
325+
+ delta_total_count
326+
+ need_recalc
327+
}
328+
297329
%% ========================
298330
%% Client to Table arrows
299331
%% ========================
300332
301333
SqlRequestQueueClient --> request_queues
302334
SqlRequestQueueClient --> request_queue_records
303335
SqlRequestQueueClient --> request_queue_state
336+
SqlRequestQueueClient --> request_queue_metadata_buffer
304337
```
305338

306339
Configuration options for the <ApiLink to="class/SqlStorageClient">`SqlStorageClient`</ApiLink> can be set through environment variables or the <ApiLink to="class/Configuration">`Configuration`</ApiLink> class:

src/crawlee/storage_clients/_sql/_client_mixin.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -272,10 +272,8 @@ async def _purge(self, metadata_kwargs: MetadataUpdateParams) -> None:
272272
await self._process_buffers()
273273

274274
stmt_records = delete(self._ITEM_TABLE).where(self._ITEM_TABLE.storage_id == self._id)
275-
stmt_buffers = delete(self._BUFFER_TABLE).where(self._BUFFER_TABLE.storage_id == self._id)
276275
async with self.get_session(with_simple_commit=True) as session:
277276
await session.execute(stmt_records)
278-
await session.execute(stmt_buffers)
279277
await self._update_metadata(session, **metadata_kwargs)
280278

281279
async def _drop(self) -> None:

src/crawlee/storage_clients/_sql/_storage_client.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -273,10 +273,9 @@ def _get_or_create_engine(self, configuration: Configuration) -> AsyncEngine:
273273
self._engine = create_async_engine(
274274
connection_string,
275275
future=True,
276-
pool_size=5,
277-
max_overflow=10,
278-
pool_timeout=30,
279-
pool_recycle=600,
276+
pool_size=10,
277+
max_overflow=50,
278+
pool_timeout=60,
280279
pool_pre_ping=True,
281280
echo=False,
282281
connect_args={'timeout': 30},

0 commit comments

Comments
 (0)