Skip to content

Commit bd7694f

Browse files
committed
enh: get_collections and get_circles now return dicts, other minor improvements
1 parent 2c939c9 commit bd7694f

7 files changed

Lines changed: 51 additions & 28 deletions

File tree

CHANGELOG

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,10 +19,13 @@
1919
- enh: remove unused "Sharing" page
2020
- enh: add icon for "Find Data"
2121
- enh: cache CKANAPI instance in GUI
22+
- enh: `APIInterrogator.search_dataset_via_api` optionally returns list
23+
- enh: `DBExtract.__iadd__` supports lists
2224
- ref: remove `mode` from `DBInterrogator`
2325
- ref: replace `user_list` with `user_autocomplete`
2426
- ref: replace `search_dataset` with `search_dataset_via_api`
2527
in `APIInterrogator`; `search_dataset` is now only free text search
28+
- ref: `get_collections` and `get_circles` now return list of dictionaries
2629
- ref: rename gui submodules for better overview
2730
- ref: move "My Data" panel into its own submodule
2831
- ref: implement `is_dc_resource_dict`

dcoraid/dbmodel/db_api.py

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -30,12 +30,11 @@ def __init__(self, api):
3030
def get_circles(self):
3131
"""Return the list of DCOR Circle names
3232
"""
33-
data = self.api.get("organization_list")
34-
return data
33+
return self.api.get("organization_list", all_fields=True)
3534

3635
def get_collections(self):
3736
"""Return the list of DCOR Collection names"""
38-
data = self.api.get("group_list")
37+
data = self.api.get("group_list", all_fields=True, limit=1000)
3938
if len(data) == 1000:
4039
raise NotImplementedError(
4140
"Reached hard limit of 1000 results! "
@@ -72,14 +71,14 @@ def get_datasets_user_shared(self):
7271

7372
for circles_batch in batched(self.get_circles(), 20):
7473
dbe += self.search_dataset_via_api(
75-
circles=list(circles_batch),
74+
circles=[c["name"] for c in circles_batch],
7675
filter_queries=[f"-creator_user_id:{self.api.user_id}"],
7776
limit=0,
7877
)
7978

8079
for collections_batch in batched(self.get_collections(), 20):
8180
dbe += self.search_dataset_via_api(
82-
collections=list(collections_batch),
81+
collections=[c["name"] for c in collections_batch],
8382
filter_queries=[f"-creator_user_id:{self.api.user_id}"],
8483
limit=0,
8584
)
@@ -134,7 +133,9 @@ def search_dataset_via_api(self,
134133
since_time: float = None,
135134
sort_solr: str = "metadata_created desc",
136135
start: int = 0,
137-
limit: int = 100):
136+
limit: int = 100,
137+
ret_db_extract: bool = True,
138+
):
138139
"""Search datasets via the CKAN API
139140
140141
Parameters
@@ -167,6 +168,10 @@ def search_dataset_via_api(self,
167168
returned datasets should begin.
168169
limit: int
169170
limit number of search results; Set to 0 to get all results
171+
ret_db_extract: bool
172+
whether to return an instance of :class:`DBExtract`; if set to
173+
`False`, then a list of datasets is returned instead which is
174+
faster.
170175
"""
171176
if filter_queries is None:
172177
filter_queries = []
@@ -230,7 +235,10 @@ def search_dataset_via_api(self,
230235

231236
num_total = np.inf # just the initial value
232237
num_retrieved = 0
233-
dbe = DBExtract()
238+
if ret_db_extract:
239+
dbe = DBExtract()
240+
else:
241+
dbe = []
234242
while start + num_retrieved < min(start + limit, num_total) and rows:
235243
data = self.api.get(
236244
"package_search",
@@ -249,7 +257,7 @@ def search_dataset_via_api(self,
249257
# in the next iteration, only get the final
250258
# few results.
251259
rows = num_total - num_retrieved
252-
dbe.add_datasets(data["results"])
260+
dbe += data["results"]
253261

254262
return dbe
255263

dcoraid/dbmodel/db_api_cached.py

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -142,15 +142,19 @@ def update(self, reset=False, abort_event=None):
142142

143143
new_timestamp = time.time()
144144

145-
dbe = self.ai.search_dataset_via_api(
146-
since_time=self.local_timestamp,
147-
limit=0,
148-
)
149-
150-
for ds_dict in dbe:
145+
for cdict in self.get_circles():
146+
logger.info(f"Fetching dataset from circle {cdict['name']}")
147+
dbe = self.ai.search_dataset_via_api(
148+
since_time=self.local_timestamp,
149+
circles=[cdict["name"]],
150+
limit=0,
151+
ret_db_extract=False,
152+
)
151153
if abort_event and abort_event.is_set():
152154
break
153-
self._mc.upsert_dataset(ds_dict)
155+
self._mc.upsert_many(dbe, org_id=cdict["id"])
156+
logger.info(
157+
f"Loaded {len(dbe)} datasets from circle {cdict['name']}")
154158
else:
155159
# Only update the local timestamp if we actually did
156160
# update the local database.

dcoraid/dbmodel/db_core.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,12 +28,12 @@ def close(self):
2828

2929
@abc.abstractmethod
3030
def get_circles(self):
31-
"""Return the list of DCOR Circles"""
31+
"""Return the list of DCOR Circle dictionaries"""
3232
pass
3333

3434
@abc.abstractmethod
3535
def get_collections(self):
36-
"""Return the list of DCOR Collections"""
36+
"""Return the list of DCOR Collection dictionaries"""
3737
pass
3838

3939
def get_datasets_user(self):

dcoraid/gui/panel_my_data/widget_my_data.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -38,18 +38,21 @@ def __init__(self, *args, **kwargs):
3838
self.checkBox_user_shared.clicked.connect(self.on_update_view)
3939

4040
@QtCore.pyqtSlot(dict, list)
41-
def on_added_datasets_to_collection(self, collection, dataset_ids):
41+
def on_added_datasets_to_collection(self,
42+
collection: dict,
43+
dataset_ids: list[str],
44+
):
4245
"""User manually added a bunch of datasets to a collection"""
4346
# Get the collection
4447
for col in self.database.get_collections():
45-
if col == collection["name"]:
48+
if col["id"] == collection["id"]:
4649
cid = collection["id"]
4750
break
4851
else:
4952
# we have to reset the database and try again
5053
self.database.update(reset=True)
5154
for col in self.database.get_collections():
52-
if col == collection["name"]:
55+
if col["id"] == collection["id"]:
5356
cid = collection["id"]
5457
break
5558
else:

tests/test_dbmodel_api.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
def test_get_circles():
2020
api = common.get_api()
2121
db = db_api.APIInterrogator(api=api)
22-
circles = db.get_circles()
22+
circles = [c["name"] for c in db.get_circles()]
2323
defaults = common.get_test_defaults()
2424
assert defaults["circle"] in circles
2525
# requires that the "dcoraid" user is in the figshare-import circle
@@ -31,7 +31,7 @@ def test_get_circles():
3131
def test_get_collections():
3232
api = common.get_api()
3333
db = db_api.APIInterrogator(api=api)
34-
collections = db.get_collections()
34+
collections = [c["name"] for c in db.get_collections()]
3535
defaults = common.get_test_defaults()
3636
assert defaults["collection"] in collections
3737
# requires that the "dcoraid" user is in figshare-collection collection
@@ -57,8 +57,10 @@ def test_public_api_interrogator():
5757
api = common.get_api()
5858
db = db_api.APIInterrogator(api=api)
5959
defaults = common.get_test_defaults()
60-
assert defaults["circle"] in db.get_circles()
61-
assert defaults["collection"] in db.get_collections()
60+
circles = [c["name"] for c in db.get_circles()]
61+
collections = [c["name"] for c in db.get_collections()]
62+
assert defaults["circle"] in circles
63+
assert defaults["collection"] in collections
6264
assert defaults["user"] in db.get_users()
6365

6466

tests/test_dbmodel_api_cache.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,8 @@
1717
def test_get_circles(tmp_path):
1818
api = common.get_api()
1919
db = CachedAPIInterrogator(cache_location=tmp_path, api=api)
20-
circles = db.get_circles()
20+
circles_dicts = db.get_circles()
21+
circles = [c["name"] for c in circles_dicts]
2122
defaults = common.get_test_defaults()
2223
assert defaults["circle"] in circles
2324
# requires that the "dcoraid" user is in the figshare-import circle
@@ -29,7 +30,7 @@ def test_get_circles(tmp_path):
2930
def test_get_collections(tmp_path):
3031
api = common.get_api()
3132
db = CachedAPIInterrogator(cache_location=tmp_path, api=api)
32-
collections = db.get_collections()
33+
collections = [c["name"] for c in db.get_collections()]
3334
defaults = common.get_test_defaults()
3435
assert defaults["collection"] in collections
3536
# requires that the "dcoraid" user is in figshare-collection collection
@@ -55,9 +56,11 @@ def test_get_users_anonymous(tmp_path):
5556
def test_public_api_interrogator(tmp_path):
5657
api = common.get_api()
5758
db = CachedAPIInterrogator(cache_location=tmp_path, api=api)
59+
circles = [c["name"] for c in db.get_circles()]
5860
defaults = common.get_test_defaults()
59-
assert defaults["circle"] in db.get_circles()
60-
assert defaults["collection"] in db.get_collections()
61+
collections = [c["name"] for c in db.get_collections()]
62+
assert defaults["circle"] in circles
63+
assert defaults["collection"] in collections
6164
assert defaults["user"] in db.get_users()
6265

6366

0 commit comments

Comments
 (0)