Skip to content

Commit f36a0d0

Browse files
authored
Merge pull request #13 from dev-dull/2_basic_stats
Complete rethink of stats and how data is recorded
2 parents b395bb3 + 10a8dc5 commit f36a0d0

2 files changed

Lines changed: 95 additions & 173 deletions

File tree

constfig.py

Lines changed: 30 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import yaml
22
import logging
33

4+
from ua_parser import parse
45
from base64 import b64decode
56
from collections.abc import Iterable
67

@@ -29,7 +30,6 @@ def set_constants(self):
2930
self.HTTP_METHOD_GET = "GET"
3031
self.HTTP_METHOD_DELETE = "DELETE"
3132

32-
self.HTTP_HEADER_USER_AGENT = "User-Agent"
3333
self.HTTP_HEADER_X_API_KEY = "X-Api-Key"
3434

3535
self.HTTP_MIME_TYPE_PNG = "image/png"
@@ -43,6 +43,31 @@ def set_constants(self):
4343
}
4444
self.LOG = logging.basicConfig(level=self._LOG_LEVELS[self.LOG_LEVEL])
4545

46+
def user_agent_evaluator(user_agent):
47+
agent = parse(user_agent.string)
48+
return {
49+
"device": {
50+
"brand": getattr(getattr(agent, "device"), "brand", None), # agent.device.brand,
51+
"family": getattr(getattr(agent, "device"), "family", None),
52+
"model": getattr(getattr(agent, "device"), "model", None),
53+
},
54+
"os": {
55+
"family": getattr(getattr(agent, "os"), "family", None), # agent.os.family,
56+
"major": getattr(getattr(agent, "os"), "major", None),
57+
"minor": getattr(getattr(agent, "os"), "minor", None),
58+
"patch": getattr(getattr(agent, "os"), "patch", None),
59+
"patch_minor": getattr(getattr(agent, "os"), "patch_minor", None),
60+
},
61+
"user_agent": {
62+
"family": getattr(getattr(agent, "user_agent"), "family", None), # agent.user_agent.family,
63+
"major": getattr(getattr(agent, "user_agent"), "major", None),
64+
"minor": getattr(getattr(agent, "user_agent"), "minor", None),
65+
"patch": getattr(getattr(agent, "user_agent"), "patch", None),
66+
"patch_minor": getattr(getattr(agent, "user_agent"), "patch_minor", None),
67+
},
68+
"string": user_agent.string,
69+
}
70+
4671
self.FLASK_REQUEST_KEY_CONTENT_TYPE = "content_type"
4772
self.FLASK_REQUEST_KEY_HEADERS = "headers"
4873
self.FLASK_REQUEST_KEY_REFERRER = "referrer"
@@ -52,10 +77,12 @@ def set_constants(self):
5277
# k,v pair where the key is the name of a property in Flask's request object, and the value is a function that turns the value into a type that
5378
# json.dump() can evaluate for saving to disk.
5479
self.FLASK_REQUEST_KEY_CONTENT_TYPE: lambda content_type: content_type,
55-
self.FLASK_REQUEST_KEY_HEADERS: lambda headers: dict(headers), # Saving headers has the unintended side effect of saving the user agent a second time.
80+
self.FLASK_REQUEST_KEY_HEADERS: lambda headers: dict(
81+
headers
82+
), # Saving headers has the unintended side effect of saving the user agent a second time.
5683
self.FLASK_REQUEST_KEY_REFERRER: lambda referrer: referrer,
5784
self.FLASK_REQUEST_KEY_REMOTE_ADDR: lambda remote_addr: remote_addr,
58-
self.FLASK_REQUEST_KEY_USER_AGENT: lambda user_agent: user_agent.string,
85+
self.FLASK_REQUEST_KEY_USER_AGENT: user_agent_evaluator,
5986
}
6087

6188
def load_config(self):

ddb.py

Lines changed: 65 additions & 170 deletions
Original file line numberDiff line numberDiff line change
@@ -2,24 +2,22 @@
22

33
from constfig import C
44
from flask import request
5-
from ua_parser import parse
65
from time import time, sleep
7-
from functools import lru_cache
86
from collections import defaultdict
97

108
## Shape of the data that gets saved to disk:
119
# {
12-
# "registered-test-id": {
13-
# "1746643582.796701": {
10+
# "test": {
11+
# "1751850528.990112": {
1412
# "content_type": null,
1513
# "headers": {
16-
# "Host": "127.0.0.1:5000",
17-
# "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:137.0) Gecko/20100101 Firefox/137.0",
14+
# "Host": "localhost:5000",
15+
# "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:140.0) Gecko/20100101 Firefox/140.0",
1816
# "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
1917
# "Accept-Language": "en-US,en;q=0.5",
2018
# "Accept-Encoding": "gzip, deflate, br, zstd",
2119
# "Connection": "keep-alive",
22-
# "Cookie": "oc_sessionPassphrase=nnn; ocqov586km8r=nnn",
20+
# "Cookie": "_ssss=2|88888...aaaaaa",
2321
# "Upgrade-Insecure-Requests": "1",
2422
# "Sec-Fetch-Dest": "document",
2523
# "Sec-Fetch-Mode": "navigate",
@@ -31,7 +29,28 @@
3129
# },
3230
# "referrer": null,
3331
# "remote_addr": "127.0.0.1",
34-
# "user_agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:137.0) Gecko/20100101 Firefox/137.0"
32+
# "user_agent": {
33+
# "device": {
34+
# "brand": "Apple",
35+
# "family": "Mac",
36+
# "model": "Mac"
37+
# },
38+
# "os": {
39+
# "family": "Mac OS X",
40+
# "major": "10",
41+
# "minor": "15",
42+
# "patch": null,
43+
# "patch_minor": null
44+
# },
45+
# "user_agent": {
46+
# "family": "Firefox",
47+
# "major": "140",
48+
# "minor": "0",
49+
# "patch": null,
50+
# "patch_minor": null
51+
# },
52+
# "string": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:140.0) Gecko/20100101 Firefox/140.0"
53+
# }
3554
# }
3655
# }
3756
# }
@@ -115,31 +134,34 @@
115134
# ddb: dumb database
116135
class DDB(dict):
117136
def __init__(self, d={}, max_size=10000):
118-
# TODO - trying to retain dict compatibility is becoming a mess with the call to super, the call to load, and the below d.items loop. Something needs to change here.
119-
super().__init__(d)
120137
self._max_size = max_size
121138

122-
self.load()
139+
if d:
140+
# The user passed in data, so assume we should load that instead of data from disk.
141+
for k, v in d.items():
142+
if isinstance(v, dict):
143+
self[k] = _DDB(v, max_size=max_size)
144+
else:
145+
raise TypeError(f"Invalid type for value {v} in dictionary. Expected dict, got {type(v)}")
146+
else:
147+
self.load()
123148

124-
for k, v in d.items():
125-
if isinstance(v, dict):
126-
self[k] = _DDB(v, max_size=max_size)
127-
else:
128-
raise TypeError(f"Invalid type for value {v} in dictionary. Expected dict, got {type(v)}")
149+
def _get_id(self):
150+
return request.args.get("id")
129151

130152
def register(self):
131-
id = request.args.get("id")
153+
id = self._get_id()
132154
if id in self:
133155
raise KeyError(f"ID {id} already registered")
134156
super().__setitem__(id, _DDB(max_size=self._max_size))
135157

136158
def unregister(self):
137-
id = request.args.get("id")
159+
id = self._get_id()
138160
if id in self:
139161
return self.pop(id)
140162

141163
def __call__(self):
142-
id = request.args.get("id")
164+
id = self._get_id()
143165
data_set = {}
144166
for flask_request_key, serializer in C.FLASK_REQUEST_SERIALIZERS.items():
145167
data_set[flask_request_key] = serializer(getattr(request, flask_request_key, None))
@@ -180,124 +202,29 @@ def load(self, filename="uadb.json"):
180202

181203
@property
182204
def browser_family_counts(self):
183-
browser_family_stats = defaultdict(int)
184-
for id, ddb in self.items():
185-
for browser_family, count_val in ddb.browser_family_counts.items():
186-
browser_family_stats[browser_family] += count_val
187-
return browser_family_stats
188-
189-
@property
190-
def browser_family_counts_by_id(self):
191-
browser_family_stats = defaultdict(lambda: defaultdict(int))
192-
for id, ddb in self.items():
193-
for browser_family, count in ddb.browser_family_counts_by_remote_addr.items():
194-
for remote_addr, count_val in count.items():
195-
browser_family_stats[id][browser_family] += count_val
196-
return browser_family_stats
197-
198-
@property
199-
def browser_family_counts_by_remote_addr(self):
200-
browser_family_stats = defaultdict(lambda: defaultdict(int))
201-
for id, ddb in self.items():
202-
for browser_family, count in ddb.browser_family_counts_by_remote_addr.items():
203-
for remote_addr, count_val in count.items():
204-
browser_family_stats[browser_family][remote_addr] += count_val
205-
return browser_family_stats
206-
207-
@property
208-
def browser_family_counts_by_id_by_remote_addr(self):
209-
browser_family_stats = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
210-
for id, ddb in self.items():
211-
for browser_family, count in ddb.browser_family_counts_by_remote_addr.items():
212-
for remote_addr, count_val in count.items():
213-
browser_family_stats[id][browser_family][remote_addr] += count_val
214-
return browser_family_stats
215-
205+
return self._get_counts("browser_family_counts")
216206

217207
@property
218208
def os_family_counts(self):
219-
os_family_stats = defaultdict(int)
220-
for id, ddb in self.items():
221-
for os_family, count_val in ddb.os_family_counts.items():
222-
os_family_stats[os_family] += count_val
223-
return os_family_stats
224-
225-
@property
226-
def os_family_counts_by_id(self):
227-
os_family_stats = defaultdict(lambda: defaultdict(int))
228-
for id, ddb in self.items():
229-
for os_family, count in ddb.os_family_counts_by_remote_addr.items():
230-
for remote_addr, count_val in count.items():
231-
os_family_stats[id][os_family] += count_val
232-
return os_family_stats
233-
234-
# @property # old way
235-
# def os_family_counts_by_remote_addr(self):
236-
# os_family_stats = defaultdict(lambda: defaultdict(int))
237-
# for id, ddb in self.items():
238-
# for os_family, count in ddb.os_family_counts_by_remote_addr.items():
239-
# for remote_addr, count_val in count.items():
240-
# os_family_stats[os_family][remote_addr] += count_val
241-
# return os_family_stats
242-
243-
@property # New way with method re-use
244-
def os_family_counts_by_remote_addr(self):
245-
detailed_os_family_stats = self.os_family_counts_by_id_by_remote_addr
246-
os_family_stats = defaultdict(lambda: defaultdict(int))
247-
for id, remote_addr_stats in detailed_os_family_stats.items():
248-
for remote_addr, os_family in remote_addr_stats.items():
249-
for os_family, count_val in os_family.items():
250-
os_family_stats[remote_addr][os_family] += count_val
251-
return os_family_stats
252-
253-
@property
254-
def os_family_counts_by_id_by_remote_addr(self):
255-
os_family_stats = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
256-
for id, ddb in self.items():
257-
for os_family, count in ddb.os_family_counts_by_remote_addr.items():
258-
for remote_addr, count_val in count.items():
259-
os_family_stats[id][os_family][remote_addr] += count_val
260-
return os_family_stats
209+
return self._get_counts("os_family_counts")
261210

262211
@property
263212
def referrer_counts(self):
264-
referrers_stats = defaultdict(int)
265-
for id, ddb in self.items():
266-
for referrer, count_val in ddb.referrer_counts.items():
267-
referrers_stats[referrer] += count_val
268-
return referrers_stats
213+
return self._get_counts("referrer_counts")
269214

270-
@property
271-
def referrer_counts_by_id(self):
272-
referrer_stats = defaultdict(lambda: defaultdict(int))
215+
def _get_counts(self, property):
216+
stats = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
273217
for id, ddb in self.items():
274-
for referrer, count in ddb.referrer_counts_by_remote_addr.items():
275-
for remote_addr, count_val in count.items():
276-
referrer_stats[id][remote_addr] += count_val
277-
return referrer_stats
278-
279-
@property
280-
def referrer_counts_by_remote_addr(self):
281-
referrer_stats = defaultdict(lambda: defaultdict(int))
282-
for id, ddb in self.items():
283-
for referrer, count in ddb.referrer_counts_by_remote_addr.items():
284-
for remote_addr, count_val in count.items():
285-
referrer_stats[referrer][remote_addr] += count_val
286-
return referrer_stats
287-
288-
@property
289-
def referrer_counts_by_id_by_remote_addr(self):
290-
referrer_stats = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
291-
for id, ddb in self.items():
292-
for referrer, count in ddb.referrer_counts_by_remote_addr.items():
293-
for remote_addr, count_val in count.items():
294-
referrer_stats[id][referrer][remote_addr] += count_val
295-
return referrer_stats
218+
for referrer, count in getattr(ddb, property).items():
219+
for value, _count in count.items():
220+
stats[id][referrer][value] += _count
221+
return stats
296222

297223

298224
class _DDB(dict):
299225
def __init__(self, d={}, max_size=10000):
300-
super().__init__(d)
226+
for k, v in d.items():
227+
self[k] = v
301228
self._max_size = max_size
302229

303230
def __add__(self, user_data):
@@ -308,63 +235,31 @@ def __add__(self, user_data):
308235
self._cleanup()
309236
return self
310237

311-
def _get_user_agent(self, timestamp):
312-
a = parse(self[timestamp]['user_agent'])
313-
return a
314-
315238
def _cleanup(self):
316-
now = time()
317239
while len(self) > self._max_size:
318240
# it's silly that we have to cast to list here, but dict_keys is not subscriptable
319241
del self[list(self.keys())[0]]
320242

321243
@property
322244
def browser_family_counts(self):
323-
browser_family = defaultdict(int)
324-
for timestamp in self.keys():
325-
browser = self._get_user_agent(timestamp)
326-
browser_family[browser.user_agent.family if browser else "Unknown"] += 1
327-
return browser_family
328-
329-
@property
330-
def browser_family_counts_by_remote_addr(self):
331-
browser_family = defaultdict(lambda: defaultdict(int))
332-
for timestamp in self.keys():
333-
browser = self._get_user_agent(timestamp)
334-
remote_addr = self[timestamp]['remote_addr']
335-
browser_family[remote_addr][browser.user_agent.family if browser else "Unknown"] += 1
336-
return browser_family
245+
return self._get_counts("family", parents=["user_agent", "user_agent"])
337246

338247
@property
339248
def os_family_counts(self):
340-
os_family = defaultdict(int)
341-
for timestamp in self.keys():
342-
_os = self._get_user_agent(timestamp).os # '_os' has leading underscore to avoid conflicts with the 'os' module
343-
os_family[_os.family if _os else "Unknown"] += 1
344-
return os_family
345-
346-
@property
347-
def os_family_counts_by_remote_addr(self):
348-
os_family = defaultdict(lambda: defaultdict(int))
349-
for timestamp in self.keys():
350-
_os = self._get_user_agent(timestamp).os # '_os' has leading underscore to avoid conflicts with the 'os' module
351-
remote_addr = self[timestamp]['remote_addr']
352-
os_family[remote_addr][_os.family if _os else "Unknown"] += 1
353-
return os_family
249+
return self._get_counts("family", parents=["user_agent", "os"])
354250

355251
@property
356252
def referrer_counts(self):
357-
referrers = defaultdict(int)
358-
for timestamp in self.keys():
359-
referrer = self[timestamp]['referrer']
360-
referrers[referrer if referrer else "Unknown"] += 1
361-
return referrers
253+
return self._get_counts("referrer")
362254

363-
@property
364-
def referrer_counts_by_remote_addr(self):
365-
referrers = defaultdict(lambda: defaultdict(int))
255+
def _get_counts(self, property, parents=[]):
256+
return_data = defaultdict(lambda: defaultdict(int))
366257
for timestamp in self.keys():
367-
referrer = self[timestamp]['referrer']
368-
remote_addr = self[timestamp]['remote_addr']
369-
referrers[remote_addr][referrer if referrer else "Unknown"] += 1
370-
return referrers
258+
reference_object = self[timestamp]
259+
for parent in parents:
260+
reference_object = reference_object.get(parent) or "Unknown"
261+
262+
key = reference_object.get(property) or "Unknown"
263+
remote_addr = self[timestamp]["remote_addr"]
264+
return_data[remote_addr][key] += 1
265+
return return_data

0 commit comments

Comments
 (0)