|
| 1 | +from itertools import islice |
| 2 | +import sys |
1 | 3 | import urllib.parse |
2 | 4 |
|
3 | 5 | import numpy as np |
|
6 | 8 | from .extract import DBExtract |
7 | 9 |
|
8 | 10 |
|
| 11 | +if sys.version_info >= (3, 12): |
| 12 | + from itertools import batched |
| 13 | +else: |
| 14 | + def batched(iterable, chunk_size): |
| 15 | + iterator = iter(iterable) |
| 16 | + while chunk := tuple(islice(iterator, chunk_size)): |
| 17 | + yield chunk |
| 18 | + |
| 19 | + |
9 | 20 | class APIInterrogator(DBInterrogator): |
10 | 21 | def __init__(self, api): |
11 | 22 | self.api = api.copy() |
@@ -61,14 +72,24 @@ def get_datasets_user_owned(self): |
61 | 72 | def get_datasets_user_shared(self): |
62 | 73 | """Return datasets shared with the user""" |
63 | 74 | assert self.mode == "user" |
64 | | - # perform a dataset search with all circles and collections |
65 | | - dbextract = self.search_dataset( |
66 | | - circles=self.get_circles(), |
67 | | - collections=self.get_collections(), |
68 | | - circle_collection_union=True, |
69 | | - filter_queries=[f"-creator_user_id:{self.api.user_id}"], |
70 | | - limit=0, |
71 | | - ) |
| 75 | + # Perform a dataset search with all circles and collections. |
| 76 | + # This search may become too large (414 Request-URI Too Large). |
| 77 | + # Limit the search to 20 circles/collections. |
| 78 | + dbextract = DBExtract() |
| 79 | + |
| 80 | + for circles_batch in batched(self.get_circles(), 20): |
| 81 | + dbextract += self.search_dataset( |
| 82 | + circles=list(circles_batch), |
| 83 | + filter_queries=[f"-creator_user_id:{self.api.user_id}"], |
| 84 | + limit=0, |
| 85 | + ) |
| 86 | + |
| 87 | + for collections_batch in batched(self.get_collections(), 20): |
| 88 | + dbextract += self.search_dataset( |
| 89 | + collections=list(collections_batch), |
| 90 | + filter_queries=[f"-creator_user_id:{self.api.user_id}"], |
| 91 | + limit=0, |
| 92 | + ) |
72 | 93 |
|
73 | 94 | # all packages the user is a collaborator in |
74 | 95 | collaborated = self.api.get("package_collaborator_list_for_user", |
@@ -114,7 +135,7 @@ def search_dataset(self, query="*:*", filter_queries=None, circles=None, |
114 | 135 | circle_collection_union: bool |
115 | 136 | If set to True, make a union of the circle and collection |
116 | 137 | sets. Otherwise (default), search only for datasets that |
117 | | - are are at least member of one of the circles and one of the |
| 138 | + are at least member of one of the circles and one of the |
118 | 139 | collections. |
119 | 140 | limit: int |
120 | 141 | limit number of search results; Set to 0 to get all results |
|
0 commit comments