Skip to content

Commit da97967

Browse files
authored
Merge pull request #20 from AASHE/chunk-queries
Chunk queries & subscriptions
2 parents fc0ea31 + e38da84 commit da97967

7 files changed

Lines changed: 152 additions & 35 deletions

File tree

README.md

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,11 +34,36 @@ This can now be used to make additional calls using the methods included in
3434
the WSDL from MemberSuite. For documentation on available methods and their
3535
usage, see http://api.docs.membersuite.com/
3636

37-
Use request_session() as a model for constructing the headers for
37+
Use request_session() as a model for constructing the headers for
3838
your own functions in your app that follow this method:
3939

4040
1) Call client.construct_concierge_header(url) to generate a new header element, using your method's URL as an argument.
4141
2) Call client.service.method_name(_soapheaders=[concierge_request_header], method arguments)
4242
3) Return any relevant data out of the response object
4343

4444
***IMPORTANT NOTE: In constructing headers, SessionId must appear first.***
45+
46+
## Running tests
47+
48+
To run all tests:
49+
50+
$ nosetests
51+
52+
To run specific tests, load them by module. For example:
53+
54+
$ python -m membersuite_api_client.tests.test_subscriptions
55+
56+
## Contributing and Extending
57+
58+
Looking to contribute? The best place to start is in the code base. Notice how
59+
we created modules for each MemberSuite objects, like `organizations`.
60+
61+
Each module contains `services.py` and `models.py` files.
62+
63+
Your "models" are simply python representations of the MemberSuite objects.
64+
65+
Your "services" provide interfaces to those models in MemberSuite. This is
66+
where objects are retrieved from MemberSuite and converted to your models for
67+
use in a python app. It is recommended that the services be classes and if
68+
you define `result_to_models` and `ms_object_to_model` methods on the class
69+
you can use the ChunkQueryMixin to make large queries.

membersuite_api_client/mixins.py

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
from retrying import retry
2+
3+
4+
class ChunkQueryMixin():
5+
"""
6+
A mixin for API client service classes that makes it easy to consistently
7+
request multiple queries from a MemberSuite endpoint.
8+
9+
Membersuite will often time out on big queries, so this allows us to
10+
break it up into smaller requests.
11+
12+
Design assumptions:
13+
- The service defines an `result_to_models` method to "transform"
14+
the objects returned by the endpoint
15+
"""
16+
17+
def get_long_query(
18+
self, base_query, retry_attempts=2, limit_to=200, max_calls=None):
19+
"""
20+
Takes a base query for all objects and recursively requests them
21+
22+
@base_query - the base query to be executed
23+
@retry_attempts - the number of times to retry a query when it fails
24+
@limit_to - how many rows to query for in each chunk
25+
@max_recursion_depth - None is infinite
26+
"""
27+
28+
@retry(stop_max_attempt_number=retry_attempts)
29+
def run_query(base_query, start_record, limit_to):
30+
# inline method to take advantage of retry
31+
result = self.client.runSQL(
32+
query=base_query,
33+
start_record=start_record,
34+
limit_to=limit_to,
35+
)
36+
return self.result_to_models(result)
37+
38+
record_index = 0
39+
result = run_query(base_query, record_index, limit_to)
40+
all_objects = result
41+
call_count = 1
42+
"""
43+
continue to run queries as long as we
44+
- don't excede the call call_count
45+
- don't see results that are less than the limited length (the end)
46+
"""
47+
while (
48+
call_count != max_calls and
49+
len(result) >= limit_to):
50+
51+
record_index += len(result) # should be `limit_to`
52+
all_objects += run_query(base_query, record_index, limit_to)
53+
call_count += 1
54+
55+
return all_objects

membersuite_api_client/subscriptions/models.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,9 @@
77

88
class Subscription(object):
99

10-
def __init__(self, id, org, start, end, extra_data={}):
10+
def __init__(self, id, org_id, start, end, extra_data={}):
1111
self.id = id
12-
self.org = org
12+
self.org_id = org_id
1313
self.start = start
1414
self.end = end
1515
self.extra_data = extra_data # all other fields, for reference

membersuite_api_client/subscriptions/services.py

Lines changed: 48 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -4,62 +4,84 @@
44
http://api.docs.membersuite.com/#References/Objects/Subscription.htm
55
66
@todo
7-
let's define an Organization object, like this Subscription,
8-
for this interface
9-
@todo
10-
confirm owner field is actually the orgnization
11-
@todo
12-
set up fixtures in MemberSuite for integration testing
7+
set up fixtures in MemberSuite sandbox for integration testing
138
@todo
149
add date modified param for performance
15-
@todo
16-
additional method for getting all subscriptions for syncing purposes
1710
"""
1811

1912
from .models import Subscription
13+
from ..exceptions import ExecuteMSQLError
14+
from ..mixins import ChunkQueryMixin
2015
from ..utils import convert_ms_object
2116

17+
import datetime
2218

23-
class SubscriptionService(object):
19+
20+
class SubscriptionService(ChunkQueryMixin, object):
2421

2522
def __init__(self, client):
2623
"""
2724
Accepts a ConciergeClient to connect with MemberSuite
2825
"""
2926
self.client = client
3027

31-
def get_org_subscriptions(self, org_id, publication_id=None):
28+
def get_subscriptions(
29+
self, publication_id=None, org_id=None, since_when=None,
30+
retry_attempts=2, limit_to=200, max_calls=None):
3231
"""
33-
Get all the subscriptions for a given organization
34-
35-
Returns a list of subscription objects
32+
Fetches all subscriptions from Membersuite of a particular
33+
`publication_id` if set.
3634
"""
3735
query = "SELECT Objects() FROM Subscription"
38-
query += " WHERE owner = '%s'" % org_id
39-
36+
if org_id:
37+
query += " WHERE owner = '%s'" % org_id
4038
if publication_id:
41-
query += "AND publication = '%s'" % publication_id
39+
query += " AND publication = '%s'" % publication_id
40+
if since_when:
41+
query += " AND LastModifiedDate > '{since_when} 00:00:00'" \
42+
.format(since_when=datetime.date.today() -
43+
datetime.timedelta(days=since_when))
4244

43-
result = self.client.runSQL(query)
45+
# note, get_long_query is overkill when just looking at
46+
# one org, but it still only executes once
47+
# `get_long_query` uses `result_to_models` to return Subscriptions
48+
subscription_list = self.get_long_query(
49+
query, retry_attempts=retry_attempts, limit_to=limit_to,
50+
max_calls=max_calls)
51+
52+
return subscription_list
53+
54+
def result_to_models(self, result):
55+
"""
56+
this is the 'transorm' part of ETL:
57+
converts the result of the SQL to Subscription objects
58+
"""
4459
mysql_result = result['body']['ExecuteMSQLResult']
4560

4661
if not mysql_result['Errors']:
4762
obj_result = mysql_result['ResultValue']['ObjectSearchResult']
63+
if not obj_result['Objects']:
64+
return []
4865
objects = obj_result['Objects']['MemberSuiteObject']
4966

5067
subscription_list = []
5168
for obj in objects:
52-
sane_obj = convert_ms_object(
53-
obj['Fields']['KeyValueOfstringanyType'])
54-
subscription = Subscription(
55-
id=sane_obj['ID'],
56-
org=org_id,
57-
start=sane_obj['StartDate'],
58-
end=sane_obj['TerminationDate'],
59-
extra_data=sane_obj)
69+
subscription = self.ms_object_to_model(obj)
6070
subscription_list.append(subscription)
6171

6272
return subscription_list
6373

6474
else:
65-
return None
75+
raise ExecuteMSQLError(result)
76+
77+
def ms_object_to_model(self, ms_obj):
78+
" Converts an individual result to a Subscription Model "
79+
sane_obj = convert_ms_object(
80+
ms_obj['Fields']['KeyValueOfstringanyType'])
81+
subscription = Subscription(
82+
id=sane_obj['ID'],
83+
org_id=sane_obj['Owner'],
84+
start=sane_obj['StartDate'],
85+
end=sane_obj['TerminationDate'],
86+
extra_data=sane_obj)
87+
return subscription

membersuite_api_client/tests/test_subscriptions.py

Lines changed: 19 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -15,15 +15,28 @@ def test_get_org_subscriptions(self):
1515
Get the all subscriptions for an organization
1616
"""
1717

18-
test_org_id = "6faf90e4-0007-c91c-7dc8-0b3c53985743"
19-
subscription_list = self.service.get_org_subscriptions(test_org_id)
20-
self.assertNotEqual(subscription_list, None)
18+
test_org_id = "6faf90e4-0007-cbaa-6232-0b3c7fa70db7"
19+
subscription_list = self.service.get_subscriptions(org_id=test_org_id)
20+
self.assertGreaterEqual(len(subscription_list), 2)
2121

2222
# with publication_id
2323
STARS_PUBLICATION_ID = '6faf90e4-009e-cb9b-7c9e-0b3bcd6dff6a'
24-
subscription_list = self.service.get_org_subscriptions(
25-
test_org_id, publication_id=STARS_PUBLICATION_ID)
26-
self.assertNotEqual(subscription_list, None)
24+
subscription_list = self.service.get_subscriptions(
25+
org_id=test_org_id, publication_id=STARS_PUBLICATION_ID)
26+
self.assertGreaterEqual(len(subscription_list), 2)
27+
28+
# now for a "long query" - querying ALL subscriptions
29+
subscription_list = self.service.get_subscriptions(
30+
retry_attempts=2, limit_to=3, max_calls=3)
31+
self.assertEqual(len(subscription_list), 9)
32+
33+
# test a long query that's longer than the number we have
34+
# to ensure that edge case stops the queries
35+
subscription_list = self.service.get_subscriptions(
36+
retry_attempts=5, limit_to=200, max_calls=6)
37+
self.assertLess(len(subscription_list), 1500)
38+
39+
# @todo: test the modified date
2740

2841

2942
if __name__ == '__main__':

requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
11
future==0.16.0
22
lxml==3.7.0
3+
retrying>=1.3.3
34
zeep==0.23.0

setup.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,5 +27,6 @@ def read(fname):
2727
],
2828
include_package_data=True,
2929
install_requires=["future==0.16.0",
30+
"retrying>=1.3.3",
3031
"zeep>=0.26"]
3132
)

0 commit comments

Comments
 (0)