Skip to content

Commit c3926ad

Browse files
committed
enh: convenience kwarg since_time for search_dataset_via_api
1 parent 718e0e9 commit c3926ad

3 files changed

Lines changed: 46 additions & 1 deletion

File tree

CHANGELOG

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
- fix: '_condensed' suffix for file stem randomly missing in downloads
44
- enh: add `CKANAPI.hostname`
55
- enh: sort search results by creation date instead of score and modified date
6+
- enh: convenience kwarg `since_time` for `search_dataset_via_api`
67
- ref: replace `user_list` with `user_autocomplete`
78
- ref: replace `search_dataset` with `search_dataset_via_api`
89
in `APIInterrogator`; `search_dataset` is now only free text search.

dcoraid/dbmodel/db_api.py

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from itertools import islice
22
import sys
3+
import time
34
import urllib.parse
45

56
import numpy as np
@@ -137,6 +138,7 @@ def search_dataset_via_api(self,
137138
circles: list[str] = None,
138139
collections: list[str] = None,
139140
circle_collection_union: bool = False,
141+
since_time: float = None,
140142
sort_solr: str = "metadata_created desc",
141143
limit: int = 100):
142144
"""Search datasets via the CKAN API
@@ -146,7 +148,9 @@ def search_dataset_via_api(self,
146148
query: str
147149
search query
148150
filter_queries: list of str
149-
SOLR `fq` filter queries (are joined with 'AND')
151+
SOLR `fq` filter queries (are joined with 'AND'). The `circles`,
152+
`collections`, `circle_collection_union`, and `since_date`
153+
convenience kwargs are appended to the query list.
150154
circles: list of str
151155
list of circles (organizations) to search in
152156
collections: list of str
@@ -156,6 +160,9 @@ def search_dataset_via_api(self,
156160
sets. Otherwise (default), search only for datasets that
157161
are at least member of one of the circles and one of the
158162
collections.
163+
since_time: float
164+
Return only datasets that have been modified after this time
165+
since the epoch.
159166
sort_solr: str
160167
SOLR search ordering. By default, sort according to dataset
161168
creation date `'metadata_created desc'`. The CKAN default is
@@ -185,6 +192,7 @@ def search_dataset_via_api(self,
185192
else:
186193
solr_collections_query = None
187194

195+
# collections and/or circles filter query
188196
if solr_circle_query and solr_collections_query:
189197
if circle_collection_union:
190198
fq = f"({solr_circle_query} OR {solr_collections_query})"
@@ -198,6 +206,16 @@ def search_dataset_via_api(self,
198206
fq = ""
199207
if fq:
200208
filter_queries.append(fq)
209+
210+
# time filter query
211+
if since_time is not None:
212+
gm_time_str = time.strftime(r"%Y-%m-%dT%H\:%M\:%SZ",
213+
time.gmtime(since_time-60))
214+
# Use "metadata_modified", since datasets that were previously
215+
# drafts or private datasets made public would not show up
216+
# if "metadata_created" was used.
217+
filter_queries.append(f"metadata_modified:[{gm_time_str} TO NOW]")
218+
201219
if len(filter_queries) == 0:
202220
final_fq = ""
203221
elif len(filter_queries) == 1:

tests/test_dbmodel_api.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import pathlib
22
import random
3+
import time
34

45
import pytest
56

@@ -174,6 +175,31 @@ def test_search_dataset_only_one_filter_query():
174175
assert False, "Search did not return figshare-7771184-v2!"
175176

176177

178+
def test_search_dataset_since_time():
179+
"""Search datasets that have been modified after some time"""
180+
api = common.get_api()
181+
db = db_api.APIInterrogator(api=api)
182+
tstart = time.time()
183+
184+
# Normally, this should not return anything, except for a race
185+
# condition when multiple tests are running at the same time.
186+
de0 = db.search_dataset_via_api(since_time=tstart)
187+
188+
# Create a dataset
189+
ds_dict = common.make_dataset_for_download()
190+
191+
# Run the query again
192+
de1 = db.search_dataset_via_api(since_time=tstart)
193+
# The new dataset should be in the results.
194+
assert len(de1) > len(de0)
195+
196+
for item in de1:
197+
if item["id"] == ds_dict["id"]:
198+
break
199+
else:
200+
assert False, "created dataset not found"
201+
202+
177203
@pytest.mark.skipif(not HAS_FIGSHARE_ACCESS,
178204
reason="No access to figshare-import circle")
179205
def test_get_datasets_user_shared_figshare():

0 commit comments

Comments
 (0)