Skip to content

Commit cffb7d7

Browse files
committed
Expose full search functionality of CKAN and fix warnings
1 parent c32705a commit cffb7d7

13 files changed

Lines changed: 261 additions & 218 deletions

File tree

.idea/workspace.xml

Lines changed: 199 additions & 178 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

README.md

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -219,9 +219,11 @@ You can read an existing HDX object with the static `read_from_hdx` method whi
219219

220220
You can search for datasets and resources in HDX using the `search_in_hdx` method which takes a configuration and a query parameter and returns the a list of objects of the appropriate HDX object type eg. `list[Dataset]` eg.
221221

222-
datasets = Dataset.search_in_hdx(configuration, 'QUERY')
222+
datasets = Dataset.search_in_hdx(configuration, 'QUERY', **kwargs)
223223

224-
The query parameter takes a different format depending upon whether it is for a [dataset](http://lucene.apache.org/core/3_6_0/queryparsersyntax.html) or a [resource](http://docs.ckan.org/en/ckan-2.3.4/api/index.html#ckan.logic.action.get.resource_search).
224+
The query parameter takes a different format depending upon whether it is for a [dataset](http://lucene.apache.org/core/3_6_0/queryparsersyntax.html) or a [resource](http://docs.ckan.org/en/ckan-2.3.4/api/index.html#ckan.logic.action.get.resource_search).
225+
226+
Various additional arguments (`**kwargs`) can be supplied. These are detailed in the API documentation.
225227

226228
You can create an HDX Object, such as a dataset, resource or gallery item by calling the constructor with a configuration, which is required, and an optional dictionary containing metadata. For example:
227229

hdx/data/dataset.py

Lines changed: 18 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,7 @@ def init_resources(self) -> None:
8989
None
9090
"""
9191
self.resources = list()
92+
""":type : List[Resource]"""
9293

9394
def add_update_resource(self, resource: Any) -> None:
9495
"""Add new or update existing resource in dataset with new metadata
@@ -99,14 +100,13 @@ def add_update_resource(self, resource: Any) -> None:
99100
Returns:
100101
None
101102
"""
103+
if isinstance(resource, dict):
104+
resource = Resource(self.configuration, resource)
102105
if isinstance(resource, Resource):
103106
if 'package_id' in resource:
104107
raise HDXError("Resource %s being added already has a dataset id!" % (resource['name']))
105108
self._addupdate_hdxobject(self.resources, 'name', self._underlying_object, resource)
106109
return
107-
if isinstance(resource, dict):
108-
self._addupdate_hdxobject(self.resources, 'name', Resource, resource)
109-
return
110110
raise HDXError("Type %s cannot be added as a resource!" % type(resource).__name__)
111111

112112
def add_update_resources(self, resources: List[Any]) -> None:
@@ -164,14 +164,13 @@ def add_update_galleryitem(self, galleryitem) -> None:
164164
None
165165
166166
"""
167+
if isinstance(galleryitem, dict):
168+
galleryitem = GalleryItem(self.configuration, galleryitem)
167169
if isinstance(galleryitem, GalleryItem):
168170
if 'dataset_id' in galleryitem:
169171
raise HDXError("Gallery item %s being added already has a dataset id!" % (galleryitem['name']))
170172
self._addupdate_hdxobject(self.gallery, 'title', self._underlying_object, galleryitem)
171173
return
172-
if isinstance(galleryitem, dict):
173-
self._addupdate_hdxobject(self.gallery, 'title', GalleryItem, galleryitem)
174-
return
175174
raise HDXError("Type %s cannot be added as a gallery item!" % type(galleryitem).__name__)
176175

177176
def add_update_gallery(self, gallery: List[Any]):
@@ -430,20 +429,30 @@ def delete_from_hdx(self) -> None:
430429
self._delete_from_hdx('dataset', 'id')
431430

432431
@staticmethod
433-
def search_in_hdx(configuration: Configuration, query: str) -> List['Dataset']:
432+
def search_in_hdx(configuration: Configuration, query: str, **kwargs) -> List['Dataset']:
434433
"""Searches for datasets in HDX
435434
436435
Args:
437436
configuration (Configuration): HDX Configuration
438-
query (str): Query
437+
query (str): Query (in Solr format). Defaults to '*:*'.
438+
**kwargs: See below
439+
fq (string): Any filter queries to apply
440+
sort (string): Sorting of the search results. Defaults to 'relevance asc, metadata_modified desc'.
441+
rows (int): Number of matching rows to return
442+
start (int): Offset in the complete result for where the set of returned datasets should begin
443+
facet (string): Whether to enable faceted results. Default to True.
444+
facet.mincount (int): Minimum counts for facet fields should be included in the results
445+
facet.limit (int): Maximum number of values the facet fields return (- = unlimited). Defaults to 50.
446+
facet.field (List[str]): Fields to facet upon. Default is empty.
447+
use_default_schema (bool): Use default package schema instead of custom schema. Defaults to False.
439448
440449
Returns:
441450
List[Dataset]: List of datasets resulting from query
442451
"""
443452

444453
datasets = []
445454
dataset = Dataset(configuration)
446-
success, result = dataset._read_from_hdx('dataset', query, 'q')
455+
success, result = dataset._read_from_hdx('dataset', query, 'q', **kwargs)
447456
if result:
448457
count = result.get('count', None)
449458
if count:

hdx/data/hdxobject.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -84,15 +84,15 @@ def update_from_json(self, path: str):
8484

8585
def _read_from_hdx(self, object_type: str, value: str, fieldname: Optional[str] = 'id',
8686
action: Optional[str] = None,
87-
other_fields: dict = {}) -> Union[Tuple[bool, dict], Tuple[bool, str]]:
87+
**kwargs) -> Union[Tuple[bool, dict], Tuple[bool, str]]:
8888
"""Makes a read call to HDX passing in given parameter.
8989
9090
Args:
9191
object_type (str): Description of HDX object type (for messages)
9292
value (str): Value of HDX field
9393
fieldname (Optional[str]): HDX field name. Defaults to id.
9494
action (Optional[str]): Replacement CKAN action url to use. Defaults to None.
95-
other_fields (dict): Other fields to pass to CKAN. Defaults to empty dict.
95+
**kwargs: Other fields to pass to CKAN.
9696
9797
Returns:
9898
(bool, dict/str): (True/False, HDX object metadata/Error)
@@ -105,7 +105,7 @@ def _read_from_hdx(self, object_type: str, value: str, fieldname: Optional[str]
105105
else:
106106
action = self.actions()['show']
107107
data = {fieldname: value}
108-
data.update(other_fields)
108+
data.update(kwargs)
109109
try:
110110
result = self.hdxpostsite.call_action(action, data,
111111
requests_kwargs={'auth': self.configuration._get_credentials()})
@@ -394,7 +394,8 @@ def _separate_hdxobjects(self, hdxobjects: List[HDXObjectUpperBound], hdxobjects
394394
Returns:
395395
None
396396
"""
397-
new_hdxobjects = self.data.get(hdxobjects_name, None)
397+
new_hdxobjects = self.data.get(hdxobjects_name, list())
398+
""":type : List[HDXObjectUpperBound]"""
398399
if new_hdxobjects:
399400
hdxobject_names = set()
400401
for hdxobject in hdxobjects:

hdx/data/resource.py

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -121,13 +121,16 @@ def delete_from_hdx(self) -> None:
121121
self._delete_from_hdx('resource', 'id')
122122

123123
@staticmethod
124-
def search_in_hdx(configuration: Configuration, query: str) -> List['Resource']:
124+
def search_in_hdx(configuration: Configuration, query: str, **kwargs) -> List['Resource']:
125125
"""Searches for resources in HDX
126126
127127
Args:
128128
configuration (Configuration): HDX Configuration
129129
query (str): Query
130-
130+
**kwargs: See below
131+
order_by (str): A field on the Resource model that orders the results
132+
offset (int): Apply an offset to the query
133+
limit (int): Apply a limit to the query
131134
Returns:
132135
List[Resource]: List of resources resulting from query
133136
"""
@@ -153,7 +156,7 @@ def delete_datastore(self) -> None:
153156
"""
154157
success, result = self._read_from_hdx('datastore', self.data['id'], 'resource_id',
155158
self.actions()['datastore_delete'],
156-
{'force': True})
159+
force=True)
157160
if not success:
158161
logger.debug(result)
159162

@@ -176,6 +179,7 @@ def create_datastore(self, schema: List[dict], primary_key: Optional[str] = None
176179

177180
data = {'resource_id': self.data['id'], 'force': True, 'fields': schema, 'primary_key': primary_key}
178181
self._write_to_hdx('datastore_create', data, 'id')
182+
f = None
179183
try:
180184
f = open(path, 'r')
181185
reader = csv.DictReader(f)
@@ -192,8 +196,9 @@ def create_datastore(self, schema: List[dict], primary_key: Optional[str] = None
192196
except Exception as e:
193197
raise HDXError('Upload to datastore of %s failed!' % url) from e
194198
finally:
195-
f.close()
196-
os.unlink(path)
199+
if f:
200+
f.close()
201+
os.unlink(path)
197202

198203
def create_datastore_from_dict_schema(self, data: dict) -> None:
199204
"""Creates a resource in the HDX datastore from a YAML file containing a list of fields and types of

hdx/utilities/dictionary.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,20 +3,22 @@
33
"""Dict utilities"""
44
from collections import UserDict
55

6-
from typing import List, Optional
6+
from typing import List, Optional, TypeVar
77

8+
DictUpperBound = TypeVar('T', bound='dict')
89

9-
def merge_two_dictionaries(a: dict, b: dict) -> dict:
10+
11+
def merge_two_dictionaries(a: DictUpperBound, b: DictUpperBound) -> DictUpperBound:
1012
"""Merges b into a and returns merged result
1113
1214
NOTE: tuples and arbitrary objects are not handled as it is totally ambiguous what should happen
1315
1416
Args:
15-
a (dict): dictionary to merge into
16-
b: (dict): dictionary to merge from
17+
a (DictUpperBound): dictionary to merge into
18+
b: (DictUpperBound): dictionary to merge from
1719
1820
Returns:
19-
dict: Merged dictionary
21+
DictUpperBound: Merged dictionary
2022
"""
2123
key = None
2224
# ## debug output

hdx/utilities/downloader.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ def download_file(url: str, path: Optional[str] = None) -> str:
2929
raise DownloadError('Download of %s failed in setup of stream!' % url) from e
3030
if r.status_code != 200:
3131
raise DownloadError('Download of %s failed in setup of stream!' % url)
32+
f = None
3233
try:
3334
if path:
3435
f = open(path, 'wb')
@@ -42,7 +43,8 @@ def download_file(url: str, path: Optional[str] = None) -> str:
4243
except Exception as e:
4344
raise DownloadError('Download of %s failed in retrieval of stream!' % url) from e
4445
finally:
45-
f.close()
46+
if f:
47+
f.close()
4648

4749

4850
def get_headers(url: str, timeout: Optional[float] = None) -> dict:

hdx/utilities/loader.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,9 @@
66
import json
77
import os
88
import sys
9+
from typing import List, Any, Optional
910

1011
import yaml
11-
from typing import List, Any, Optional
1212

1313
from .dictionary import merge_two_dictionaries, merge_dictionaries
1414

requirements.txt

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
ckanapi==3.6
2-
colorlog==2.6.3
3-
ndg-httpsclient==0.4.0
2+
colorlog==2.7.0
3+
ndg-httpsclient==0.4.2
44
pyasn1==0.1.9
5-
pyOpenSSL==16.0.0
6-
pyaml==15.8.2
7-
requests==2.9.1
5+
pyOpenSSL==16.2.0
6+
pyaml == 16.9.0
7+
requests==2.11.1
88
scraperwiki==0.5.1
9-
typing==3.5.1
9+
typing==3.5.2.2

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313

1414
setup(
1515
name='hdx-python-api',
16-
version='0.5',
16+
version='0.51',
1717
packages=find_packages(exclude=['ez_setup', 'tests', 'tests.*']),
1818
url='http://data.humdata.org/',
1919
license='PSF',

0 commit comments

Comments
 (0)