Skip to content

Commit 3a88090

Browse files
authored
Merge pull request #4 from metaodi/data-loader
Add pagination support
2 parents b31c1a0 + f98ba90 commit 3a88090

13 files changed

Lines changed: 2573 additions & 66 deletions

examples/download_attachent.py

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,22 +2,31 @@
22
import museumpy
33
from dotenv import load_dotenv, find_dotenv
44
import os
5+
from pprint import pprint
6+
import tempfile
57

68
load_dotenv(find_dotenv())
79
user = os.getenv('MP_USER')
810
pw = os.getenv('MP_PASS')
911

10-
client = museumpy.client(
12+
client = museumpy.MuseumPlusClient(
1113
base_url='https://mpzurichrietberg.zetcom.com/MpWeb-mpZurichRietberg',
1214
requests_kwargs={'auth': (user, pw)},
1315
)
1416

15-
group_result = client.search('ObjObjectGroupTxt', 'MyGroup')
17+
group_result = client.search(
18+
field='OgrNameTxt',
19+
value='Patolu, MAP',
20+
module='ObjectGroup'
21+
)
1622
group = group_result[0]['raw']
17-
ref = group['moduleIm']['moduleReference']
23+
ref = group['moduleItem']['moduleReference']
24+
1825

19-
for ref_item in ref['moduleReferenceItem']:
26+
for ref_item in ref['moduleReferenceItem'][:5]:
2027
item = client.module_item(ref_item['moduleItemId'], ref['targetModule'])
28+
pprint(item, depth=1)
2129
if item['hasAttachments'] == 'true':
22-
attachment_path = client.download_attachment(ref_item['moduleItemId'], ref['targetModule'], 'files')
23-
print(f"Attachment downloaded and saved at {attachment_path}")
30+
with tempfile.TemporaryDirectory() as tmpdir:
31+
attachment_path = client.download_attachment(ref_item['moduleItemId'], ref['targetModule'], tmpdir)
32+
print(f"Attachment downloaded and saved at {attachment_path}")

examples/pagination.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
import museumpy
2+
from dotenv import load_dotenv, find_dotenv
3+
from pprint import pprint
4+
import os
5+
6+
load_dotenv(find_dotenv())
7+
user = os.getenv('MP_USER')
8+
pw = os.getenv('MP_PASS')
9+
10+
11+
client = museumpy.MuseumPlusClient(
12+
base_url='https://mpzurichrietberg.zetcom.com/MpWeb-mpZurichRietberg',
13+
requests_kwargs={'auth': (user, pw)}
14+
)
15+
16+
result = client.fulltext_search(
17+
query='Patolu',
18+
limit=2
19+
)
20+
21+
print(result)
22+
print(result.count)
23+
for rec in result[:5]:
24+
pprint(rec, depth=1)
25+
print(result)

examples/simple_search.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,5 +15,5 @@
1515
)
1616

1717
pprint(records)
18-
print(len(records))
18+
print(records.count)
1919
pprint(records[0], depth=1)

museumpy/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
__version__ = '0.0.3'
22
__all__ = ['client', 'errors', 'response', 'xmlparse']
33

4-
from .errors import MuseumPlusError # noqa
4+
from .errors import MuseumpyError # noqa
55
from .client import MuseumPlusClient
66

77
def fulltext_search(base_url, query, **kwargs): # noqa

museumpy/client.py

Lines changed: 67 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -45,61 +45,61 @@ def __init__(self, base_url=None, map_function=None, requests_kwargs=None):
4545

4646
def fulltext_search(self, query, module='Object', limit=100, offset=0):
4747
url = f"{self.base_url}/ria-ws/application/module/{module}/search"
48-
data = FULLTEXT_TEMPLATE.format(
49-
module_name=module,
50-
limit=limit,
51-
offset=offset,
52-
query=query,
53-
)
54-
xml = data.encode("utf-8")
55-
xml_response = self._post_xml(url, xml)
56-
return response.SearchResponse(xml_response, self.map_function)
48+
params = {
49+
'module_name': module,
50+
'query': query,
51+
}
52+
data_loader = DataPoster(url, params, FULLTEXT_TEMPLATE, self.requests_kwargs)
53+
return response.SearchResponse(data_loader, limit, offset, self.map_function)
5754

5855
def search(self, field, value, module='Object', limit=100, offset=0):
5956
url = f"{self.base_url}/ria-ws/application/module/{module}/search"
60-
data = SEARCH_TEMPLATE.format(
61-
module_name=module,
62-
limit=limit,
63-
offset=offset,
64-
field=field,
65-
value=value,
66-
)
67-
xml = data.encode("utf-8")
68-
xml_response = self._post_xml(url, xml)
69-
return response.SearchResponse(xml_response, self.map_function)
57+
params = {
58+
'module_name': module,
59+
'field': field,
60+
'value': value,
61+
}
62+
data_loader = DataPoster(url, params, SEARCH_TEMPLATE, self.requests_kwargs)
63+
return response.SearchResponse(data_loader, limit, offset, self.map_function)
7064

7165
def module_item(self, id, module='Object'):
7266
url = f"{self.base_url}/ria-ws/application/module/{module}/{id}"
73-
xml_response = self._get_xml(url)
74-
resp = response.SearchResponse(xml_response)
75-
if len(resp) == 1:
67+
data_loader = DataLoader(url, self.requests_kwargs)
68+
resp = response.SearchResponse(data_loader)
69+
if resp.count == 1:
7670
return resp[0]
7771
return resp
7872

7973
def download_attachment(self, id, module='Object', dir='.'):
8074
url = f"{self.base_url}/ria-ws/application/module/{module}/{id}/attachment"
81-
return self._download_file(url, dir)
75+
data_loader = DataLoader(url, self.requests_kwargs)
76+
return data_loader.download_file(url, dir)
8277

83-
def _download_file(self, url, dir):
84-
headers = {'Accept': 'application/octet-stream'}
85-
res = self._get_content(url, headers)
86-
d = res.headers.get('Content-Disposition')
87-
fname = re.findall("filename=(.+)", d)[0]
88-
assert fname, "Could not find filename in Content-Disposition header"
89-
path = os.path.join(dir, fname)
90-
with open(path, 'wb') as f:
91-
for chunk in res.iter_content(1024):
92-
f.write(chunk)
93-
return path
9478

95-
def _get_xml(self, url):
96-
res = self._get_content(url)
79+
class DataPoster(object):
80+
def __init__(self, url, params=None, template=None, requests_kwargs=None):
81+
self.session = requests.Session()
82+
self.url = url
83+
self.params = params
84+
self.template = template
85+
self.xmlparser = xmlparse.XMLParser()
86+
self.requests_kwargs = requests_kwargs or {}
87+
88+
def load(self, **kwargs):
89+
self.params.update(kwargs)
90+
xml = self.template.format(**self.params).encode('utf-8')
91+
return self._post_xml(self.url, xml)
92+
93+
def _post_xml(self, url, xml):
94+
headers = {'Content-Type': 'application/xml'}
95+
res = self._post_content(url, xml, headers)
9796
return self.xmlparser.parse(res.content)
9897

99-
def _get_content(self, url, headers={}):
98+
def _post_content(self, url, data, headers):
10099
try:
101-
res = self.session.get(
100+
res = self.session.post(
102101
url,
102+
data=data,
103103
headers=headers,
104104
**self.requests_kwargs
105105
)
@@ -111,16 +111,38 @@ def _get_content(self, url, headers={}):
111111

112112
return res
113113

114-
def _post_xml(self, url, xml):
115-
headers = {'Content-Type': 'application/xml'}
116-
res = self._post_content(url, xml, headers)
114+
115+
class DataLoader(object):
116+
def __init__(self, url, requests_kwargs=None):
117+
self.session = requests.Session()
118+
self.url = url
119+
self.xmlparser = xmlparse.XMLParser()
120+
self.requests_kwargs = requests_kwargs or {}
121+
122+
def load(self, **kwargs):
123+
xml = self._get_xml(self.url)
124+
return xml
125+
126+
def download_file(self, url, dir):
127+
headers = {'Accept': 'application/octet-stream'}
128+
res = self._get_content(url, headers)
129+
d = res.headers.get('Content-Disposition')
130+
fname = re.findall("filename=(.+)", d)[0]
131+
assert fname, "Could not find filename in Content-Disposition header"
132+
path = os.path.join(dir, fname)
133+
with open(path, 'wb') as f:
134+
for chunk in res.iter_content(1024):
135+
f.write(chunk)
136+
return path
137+
138+
def _get_xml(self, url):
139+
res = self._get_content(url)
117140
return self.xmlparser.parse(res.content)
118141

119-
def _post_content(self, url, data, headers):
142+
def _get_content(self, url, headers={}):
120143
try:
121-
res = self.session.post(
144+
res = self.session.get(
122145
url,
123-
data=data,
124146
headers=headers,
125147
**self.requests_kwargs
126148
)
@@ -129,4 +151,5 @@ def _post_content(self, url, data, headers):
129151
raise errors.MuseumPlusError("HTTP error: %s" % e)
130152
except requests.exceptions.RequestException as e:
131153
raise errors.MuseumPlusError("Request error: %s" % e)
154+
132155
return res

museumpy/errors.py

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,23 @@
1-
class MuseumPlusError(Exception):
1+
class MuseumpyError(Exception):
22
"""
33
General MuseumPlus error class to provide a superclass for all other errors
44
"""
55

66

7-
class XMLParsingError(MuseumPlusError):
7+
class MuseumPlusError(MuseumpyError):
8+
"""
9+
MuseumPlus error raised when an error with the communication with MuseumPlus occurs
10+
"""
11+
12+
13+
class XMLParsingError(MuseumpyError):
814
"""
915
The error raised when parsing the XML.
1016
"""
17+
18+
19+
class NoMoreRecordsError(MuseumpyError):
20+
"""
21+
This error is raised if all records have been loaded (or no records are
22+
present)
23+
"""

museumpy/response.py

Lines changed: 70 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,42 @@
11
# -*- coding: utf-8 -*-
22

33
from . import xmlparse
4+
from . import errors
45

56
ZETCOM_NS = "http://www.zetcom.com/ria/ws/module"
67

78

89
class SearchResponse(object):
9-
def __init__(self, xml_response, map_function=None):
10+
def __init__(self, data_loader, limit=100, offset=0, map_function=None):
11+
self.data_loader = data_loader
1012
self.xmlparser = xmlparse.XMLParser()
1113
self.records = []
12-
self._extract_records(xml_response, map_function)
14+
self.count = 0
15+
self.limit = limit
16+
self.offset = offset
17+
self.map_function = map_function
1318

14-
def _extract_records(self, xml, map_function):
19+
xml = data_loader.load(limit=limit, offset=offset)
20+
self._parse_content(xml)
21+
22+
def _parse_content(self, xml):
23+
self.count = self.maybe_int(self.xmlparser.find(xml, f'.//{{{ZETCOM_NS}}}module').attrib['totalSize']) # noqa
24+
self._extract_records(xml)
25+
26+
def maybe_int(self, s):
27+
try:
28+
return int(s)
29+
except (ValueError, TypeError):
30+
return s
31+
32+
def _extract_records(self, xml):
1533
new_records = []
1634
xml_recs = self.xmlparser.findall(xml, f'.//{{{ZETCOM_NS}}}module/{{{ZETCOM_NS}}}moduleItem') # noqa
1735
for xml_rec in xml_recs:
1836
record = self._map_xml(xml_rec)
1937
record['raw'] = self.xmlparser.todict(xml_rec, xml_attribs=True)
20-
if map_function:
21-
record = map_function(record, xml_rec)
38+
if self.map_function:
39+
record = self.map_function(record, xml_rec)
2240
new_records.append(record)
2341
self.records.extend(new_records)
2442

@@ -93,18 +111,60 @@ def __repr__(self):
93111
try:
94112
return (
95113
'SearchResponse('
96-
'count=%r)'
114+
'count=%r,'
115+
'limit=%r,'
116+
'offset=%r)'
97117
) % (
98-
len(self.records),
118+
self.count,
119+
self.limit,
120+
self.offset
99121
)
100122
except AttributeError:
101123
return 'SearchResponse(empty)'
102124

103-
def __len__(self):
104-
return len(self.records)
125+
def __length_hint__(self):
126+
return self.count
105127

106128
def __iter__(self):
107-
yield from self.records
129+
# use while loop since self.records could grow while iterating
130+
i = 0
131+
while True:
132+
# load new data when near end
133+
if i == len(self.records):
134+
try:
135+
self._load_new_data()
136+
except errors.NoMoreRecordsError:
137+
break
138+
yield self.records[i]
139+
i += 1
108140

109141
def __getitem__(self, key):
142+
if isinstance(key, slice):
143+
limit = max(key.start or 0, key.stop or self.count)
144+
self._load_new_data_until(limit)
145+
count = len(self.records)
146+
return [self.records[k] for k in range(*key.indices(count))]
147+
148+
if not isinstance(key, int):
149+
raise TypeError("Index must be an integer or slice")
150+
151+
limit = key
152+
if limit < 0:
153+
# if we get a negative index, load all data
154+
limit = self.count
155+
self._load_new_data_until(limit)
110156
return self.records[key]
157+
158+
def _load_new_data_until(self, limit):
159+
while limit >= len(self.records):
160+
try:
161+
self._load_new_data()
162+
except errors.NoMoreRecordsError:
163+
break
164+
165+
def _load_new_data(self):
166+
self.offset = self.offset + self.limit
167+
if self.offset >= self.count:
168+
raise errors.NoMoreRecordsError("There are no more records")
169+
xml = self.data_loader.load(limit=self.limit, offset=self.offset)
170+
self._parse_content(xml)

setup.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,4 +5,4 @@ source pyenv/bin/activate
55

66
pip install --upgrade pip
77
pip install -r requirements.txt
8-
pip install .
8+
pip install -e .

tests/client_test.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,8 @@ class TestClient(MuseumpyTestCase):
1414
def test_simple_search(self):
1515
client = MuseumPlusClient('http://test.com/MpWeb-test')
1616
r = client.search(field='TestField', value='TestValue')
17-
self.assertEqual(len(r), 1)
17+
self.assertEqual(r.__length_hint__(), 1)
18+
self.assertEqual(r.count, 1)
1819

1920
self.assertEqual(r[0]['hasAttachments'], 'true') # noqa
2021
self.assertEqual(r[0]['ObjCreditlineGrp'], 'Geschenk Gisela Müller und Erich Gross') # noqa

0 commit comments

Comments
 (0)