@@ -30,12 +30,11 @@ def __init__(self, api):
3030 def get_circles (self ):
3131 """Return the list of DCOR Circle names
3232 """
33- data = self .api .get ("organization_list" )
34- return data
33+ return self .api .get ("organization_list" , all_fields = True )
3534
3635 def get_collections (self ):
3736 """Return the list of DCOR Collection names"""
38- data = self .api .get ("group_list" )
37+ data = self .api .get ("group_list" , all_fields = True , limit = 1000 )
3938 if len (data ) == 1000 :
4039 raise NotImplementedError (
4140 "Reached hard limit of 1000 results! "
@@ -72,14 +71,14 @@ def get_datasets_user_shared(self):
7271
7372 for circles_batch in batched (self .get_circles (), 20 ):
7473 dbe += self .search_dataset_via_api (
75- circles = list ( circles_batch ) ,
74+ circles = [ c [ "name" ] for c in circles_batch ] ,
7675 filter_queries = [f"-creator_user_id:{ self .api .user_id } " ],
7776 limit = 0 ,
7877 )
7978
8079 for collections_batch in batched (self .get_collections (), 20 ):
8180 dbe += self .search_dataset_via_api (
82- collections = list ( collections_batch ) ,
81+ collections = [ c [ "name" ] for c in collections_batch ] ,
8382 filter_queries = [f"-creator_user_id:{ self .api .user_id } " ],
8483 limit = 0 ,
8584 )
@@ -134,7 +133,9 @@ def search_dataset_via_api(self,
134133 since_time : float = None ,
135134 sort_solr : str = "metadata_created desc" ,
136135 start : int = 0 ,
137- limit : int = 100 ):
136+ limit : int = 100 ,
137+ ret_db_extract : bool = True ,
138+ ):
138139 """Search datasets via the CKAN API
139140
140141 Parameters
@@ -167,6 +168,10 @@ def search_dataset_via_api(self,
167168 returned datasets should begin.
168169 limit: int
169170 limit number of search results; Set to 0 to get all results
171+ ret_db_extract: bool
172+ whether to return an instance of :class:`DBExtract`; if set to
173+ `False`, then a list of datasets is returned instead which is
174+ faster.
170175 """
171176 if filter_queries is None :
172177 filter_queries = []
@@ -230,7 +235,10 @@ def search_dataset_via_api(self,
230235
231236 num_total = np .inf # just the initial value
232237 num_retrieved = 0
233- dbe = DBExtract ()
238+ if ret_db_extract :
239+ dbe = DBExtract ()
240+ else :
241+ dbe = []
234242 while start + num_retrieved < min (start + limit , num_total ) and rows :
235243 data = self .api .get (
236244 "package_search" ,
@@ -249,7 +257,7 @@ def search_dataset_via_api(self,
249257 # in the next iteration, only get the final
250258 # few results.
251259 rows = num_total - num_retrieved
252- dbe . add_datasets ( data ["results" ])
260+ dbe += data ["results" ]
253261
254262 return dbe
255263
0 commit comments