Expose full search functionality of CKAN and fix warnings

mcarans · mcarans · commit cffb7d700d3b · 2016-10-24T12:00:48.000+02:00
diff --git a/.idea/workspace.xml b/.idea/workspace.xml
diff --git a/README.md b/README.md
@@ -219,9 +219,11 @@ You can read an existing HDX object with the static `read_from_hdx` method whi
 
 You can search for datasets and resources in HDX using the `search_in_hdx` method which takes a configuration and a query parameter and returns the a list of objects of the appropriate HDX object type eg. `list[Dataset]` eg.
 
-        datasets = Dataset.search_in_hdx(configuration, 'QUERY')
+        datasets = Dataset.search_in_hdx(configuration, 'QUERY', **kwargs)
 
-The query parameter takes a different format depending upon whether it is for a [dataset](http://lucene.apache.org/core/3_6_0/queryparsersyntax.html) or a [resource](http://docs.ckan.org/en/ckan-2.3.4/api/index.html#ckan.logic.action.get.resource_search). 
+The query parameter takes a different format depending upon whether it is for a [dataset](http://lucene.apache.org/core/3_6_0/queryparsersyntax.html) or a [resource](http://docs.ckan.org/en/ckan-2.3.4/api/index.html#ckan.logic.action.get.resource_search).
+
+Various additional arguments (`**kwargs`) can be supplied. These are detailed in the API documentation. 
 
 You can create an HDX Object, such as a dataset, resource or gallery item by calling the constructor with a configuration, which is required, and an optional dictionary containing metadata. For example:
 
diff --git a/hdx/data/dataset.py b/hdx/data/dataset.py
@@ -89,6 +89,7 @@ def init_resources(self) -> None:
             None
         """
         self.resources = list()
+        """:type : List[Resource]"""
 
     def add_update_resource(self, resource: Any) -> None:
         """Add new or update existing resource in dataset with new metadata
@@ -99,14 +100,13 @@ def add_update_resource(self, resource: Any) -> None:
         Returns:
             None
         """
+        if isinstance(resource, dict):
+            resource = Resource(self.configuration, resource)
         if isinstance(resource, Resource):
             if 'package_id' in resource:
                 raise HDXError("Resource %s being added already has a dataset id!" % (resource['name']))
             self._addupdate_hdxobject(self.resources, 'name', self._underlying_object, resource)
             return
-        if isinstance(resource, dict):
-            self._addupdate_hdxobject(self.resources, 'name', Resource, resource)
-            return
         raise HDXError("Type %s cannot be added as a resource!" % type(resource).__name__)
 
     def add_update_resources(self, resources: List[Any]) -> None:
@@ -164,14 +164,13 @@ def add_update_galleryitem(self, galleryitem) -> None:
             None
 
         """
+        if isinstance(galleryitem, dict):
+            galleryitem = GalleryItem(self.configuration, galleryitem)
         if isinstance(galleryitem, GalleryItem):
             if 'dataset_id' in galleryitem:
                 raise HDXError("Gallery item %s being added already has a dataset id!" % (galleryitem['name']))
             self._addupdate_hdxobject(self.gallery, 'title', self._underlying_object, galleryitem)
             return
-        if isinstance(galleryitem, dict):
-            self._addupdate_hdxobject(self.gallery, 'title', GalleryItem, galleryitem)
-            return
         raise HDXError("Type %s cannot be added as a gallery item!" % type(galleryitem).__name__)
 
     def add_update_gallery(self, gallery: List[Any]):
@@ -430,20 +429,30 @@ def delete_from_hdx(self) -> None:
         self._delete_from_hdx('dataset', 'id')
 
     @staticmethod
-    def search_in_hdx(configuration: Configuration, query: str) -> List['Dataset']:
+    def search_in_hdx(configuration: Configuration, query: str, **kwargs) -> List['Dataset']:
         """Searches for datasets in HDX
 
         Args:
             configuration (Configuration): HDX Configuration
-            query (str): Query
+            query (str): Query (in Solr format). Defaults to '*:*'.
+            **kwargs: See below
+            fq (string): Any filter queries to apply
+            sort (string): Sorting of the search results. Defaults to 'relevance asc, metadata_modified desc'.
+            rows (int): Number of matching rows to return
+            start (int): Offset in the complete result for where the set of returned datasets should begin
+            facet (string): Whether to enable faceted results. Default to True.
+            facet.mincount (int): Minimum counts for facet fields should be included in the results
+            facet.limit (int): Maximum number of values the facet fields return (- = unlimited). Defaults to 50.
+            facet.field (List[str]): Fields to facet upon. Default is empty.
+            use_default_schema (bool): Use default package schema instead of custom schema. Defaults to False.
 
         Returns:
             List[Dataset]: List of datasets resulting from query
         """
 
         datasets = []
         dataset = Dataset(configuration)
-        success, result = dataset._read_from_hdx('dataset', query, 'q')
+        success, result = dataset._read_from_hdx('dataset', query, 'q', **kwargs)
         if result:
             count = result.get('count', None)
             if count:
diff --git a/hdx/data/hdxobject.py b/hdx/data/hdxobject.py
@@ -84,15 +84,15 @@ def update_from_json(self, path: str):
 
     def _read_from_hdx(self, object_type: str, value: str, fieldname: Optional[str] = 'id',
                        action: Optional[str] = None,
-                       other_fields: dict = {}) -> Union[Tuple[bool, dict], Tuple[bool, str]]:
+                       **kwargs) -> Union[Tuple[bool, dict], Tuple[bool, str]]:
         """Makes a read call to HDX passing in given parameter.
 
         Args:
             object_type (str): Description of HDX object type (for messages)
             value (str): Value of HDX field
             fieldname (Optional[str]): HDX field name. Defaults to id.
             action (Optional[str]): Replacement CKAN action url to use. Defaults to None.
-            other_fields (dict): Other fields to pass to CKAN. Defaults to empty dict.
+            **kwargs: Other fields to pass to CKAN.
 
         Returns:
             (bool, dict/str): (True/False, HDX object metadata/Error)
@@ -105,7 +105,7 @@ def _read_from_hdx(self, object_type: str, value: str, fieldname: Optional[str]
             else:
                 action = self.actions()['show']
         data = {fieldname: value}
-        data.update(other_fields)
+        data.update(kwargs)
         try:
             result = self.hdxpostsite.call_action(action, data,
                                                   requests_kwargs={'auth': self.configuration._get_credentials()})
@@ -394,7 +394,8 @@ def _separate_hdxobjects(self, hdxobjects: List[HDXObjectUpperBound], hdxobjects
         Returns:
             None
         """
-        new_hdxobjects = self.data.get(hdxobjects_name, None)
+        new_hdxobjects = self.data.get(hdxobjects_name, list())
+        """:type : List[HDXObjectUpperBound]"""
         if new_hdxobjects:
             hdxobject_names = set()
             for hdxobject in hdxobjects:
diff --git a/hdx/data/resource.py b/hdx/data/resource.py
@@ -121,13 +121,16 @@ def delete_from_hdx(self) -> None:
         self._delete_from_hdx('resource', 'id')
 
     @staticmethod
-    def search_in_hdx(configuration: Configuration, query: str) -> List['Resource']:
+    def search_in_hdx(configuration: Configuration, query: str, **kwargs) -> List['Resource']:
         """Searches for resources in HDX
 
         Args:
             configuration (Configuration): HDX Configuration
             query (str): Query
-
+            **kwargs: See below
+            order_by (str): A field on the Resource model that orders the results
+            offset (int): Apply an offset to the query
+            limit (int): Apply a limit to the query
         Returns:
             List[Resource]: List of resources resulting from query
         """
@@ -153,7 +156,7 @@ def delete_datastore(self) -> None:
         """
         success, result = self._read_from_hdx('datastore', self.data['id'], 'resource_id',
                                               self.actions()['datastore_delete'],
-                                              {'force': True})
+                                              force=True)
         if not success:
             logger.debug(result)
 
@@ -176,6 +179,7 @@ def create_datastore(self, schema: List[dict], primary_key: Optional[str] = None
 
         data = {'resource_id': self.data['id'], 'force': True, 'fields': schema, 'primary_key': primary_key}
         self._write_to_hdx('datastore_create', data, 'id')
+        f = None
         try:
             f = open(path, 'r')
             reader = csv.DictReader(f)
@@ -192,8 +196,9 @@ def create_datastore(self, schema: List[dict], primary_key: Optional[str] = None
         except Exception as e:
             raise HDXError('Upload to datastore of %s failed!' % url) from e
         finally:
-            f.close()
-            os.unlink(path)
+            if f:
+                f.close()
+                os.unlink(path)
 
     def create_datastore_from_dict_schema(self, data: dict) -> None:
         """Creates a resource in the HDX datastore from a YAML file containing a list of fields and types of
diff --git a/hdx/utilities/dictionary.py b/hdx/utilities/dictionary.py
@@ -3,20 +3,22 @@
 """Dict utilities"""
 from collections import UserDict
 
-from typing import List, Optional
+from typing import List, Optional, TypeVar
 
+DictUpperBound = TypeVar('T', bound='dict')
 
-def merge_two_dictionaries(a: dict, b: dict) -> dict:
+
+def merge_two_dictionaries(a: DictUpperBound, b: DictUpperBound) -> DictUpperBound:
     """Merges b into a and returns merged result
 
     NOTE: tuples and arbitrary objects are not handled as it is totally ambiguous what should happen
 
     Args:
-        a (dict): dictionary to merge into
-        b: (dict): dictionary to merge from
+        a (DictUpperBound): dictionary to merge into
+        b: (DictUpperBound): dictionary to merge from
 
     Returns:
-        dict: Merged dictionary
+        DictUpperBound: Merged dictionary
     """
     key = None
     # ## debug output
diff --git a/hdx/utilities/downloader.py b/hdx/utilities/downloader.py
@@ -29,6 +29,7 @@ def download_file(url: str, path: Optional[str] = None) -> str:
         raise DownloadError('Download of %s failed in setup of stream!' % url) from e
     if r.status_code != 200:
         raise DownloadError('Download of %s failed in setup of stream!' % url)
+    f = None
     try:
         if path:
             f = open(path, 'wb')
@@ -42,7 +43,8 @@ def download_file(url: str, path: Optional[str] = None) -> str:
     except Exception as e:
         raise DownloadError('Download of %s failed in retrieval of stream!' % url) from e
     finally:
-        f.close()
+        if f:
+            f.close()
 
 
 def get_headers(url: str, timeout: Optional[float] = None) -> dict:
diff --git a/hdx/utilities/loader.py b/hdx/utilities/loader.py
@@ -6,9 +6,9 @@
 import json
 import os
 import sys
+from typing import List, Any, Optional
 
 import yaml
-from typing import List, Any, Optional
 
 from .dictionary import merge_two_dictionaries, merge_dictionaries
 
diff --git a/requirements.txt b/requirements.txt
@@ -1,9 +1,9 @@
 ckanapi==3.6
-colorlog==2.6.3
-ndg-httpsclient==0.4.0
+colorlog==2.7.0
+ndg-httpsclient==0.4.2
 pyasn1==0.1.9
-pyOpenSSL==16.0.0
-pyaml==15.8.2
-requests==2.9.1
+pyOpenSSL==16.2.0
+pyaml == 16.9.0
+requests==2.11.1
 scraperwiki==0.5.1
-typing==3.5.1
+typing==3.5.2.2
diff --git a/setup.py b/setup.py
@@ -13,7 +13,7 @@
 
 setup(
     name='hdx-python-api',
-    version='0.5',
+    version='0.51',
     packages=find_packages(exclude=['ez_setup', 'tests', 'tests.*']),
     url='http://data.humdata.org/',
     license='PSF',
diff --git a/test-requirements.txt b/test-requirements.txt
@@ -1,5 +1,5 @@
-pytest==2.9.2
-pytest-cov==2.2.1
-pytest-pythonpath==0.7
+pytest==3.0.3
+pytest-cov==2.4.0
+pytest-pythonpath==0.7.1
 logging_tree==1.7
 -r requirements.txt
diff --git a/tests/hdx/data/test_dataset.py b/tests/hdx/data/test_dataset.py
@@ -170,11 +170,11 @@ class TestDataset():
     }
 
     resources_data = [{"id": "de6549d8-268b-4dfe-adaf-a4ae5c8510d5", "description": "Resource1",
-                       "package_id": "6f36a41c-f126-4b18-aaaf-6c2ddfbc5d4d", "name": "Resource1",
+                       "name": "Resource1",
                        "url": "http://resource1.xlsx",
                        "format": "xlsx"},
                       {"id": "DEF", "description": "Resource2",
-                       "package_id": "6f36a41c-f126-4b18-aaaf-6c2ddfbc5d4d", "name": "Resource2",
+                       "name": "Resource2",
                        "url": "http://resource2.csv",
                        "format": "csv"}]
 
diff --git a/workingexample/run.py b/workingexample/run.py
@@ -6,13 +6,14 @@
 '''
 import logging
 
+from hdx.configuration import Configuration
 from hdx.facades.scraperwiki import facade
 from .my_code import generate_dataset
 
 logger = logging.getLogger(__name__)
 
 
-def main(configuration: dict):
+def main(configuration: Configuration):
     '''Generate dataset and create it in HDX'''
 
     dataset = generate_dataset(configuration)