22
33from constfig import C
44from flask import request
5- from ua_parser import parse
65from time import time , sleep
7- from functools import lru_cache
86from collections import defaultdict
97
108## Shape of the data that gets saved to disk:
119# {
12- # "registered- test-id ": {
13- # "1746643582.796701 ": {
10+ # "test": {
11+ # "1751850528.990112 ": {
1412# "content_type": null,
1513# "headers": {
16- # "Host": "127.0.0.1 :5000",
17- # "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:137 .0) Gecko/20100101 Firefox/137 .0",
14+ # "Host": "localhost :5000",
15+ # "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:140 .0) Gecko/20100101 Firefox/140 .0",
1816# "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
1917# "Accept-Language": "en-US,en;q=0.5",
2018# "Accept-Encoding": "gzip, deflate, br, zstd",
2119# "Connection": "keep-alive",
22- # "Cookie": "oc_sessionPassphrase=nnn; ocqov586km8r=nnn ",
20+ # "Cookie": "_ssss=2|88888...aaaaaa ",
2321# "Upgrade-Insecure-Requests": "1",
2422# "Sec-Fetch-Dest": "document",
2523# "Sec-Fetch-Mode": "navigate",
3129# },
3230# "referrer": null,
3331# "remote_addr": "127.0.0.1",
34- # "user_agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:137.0) Gecko/20100101 Firefox/137.0"
32+ # "user_agent": {
33+ # "device": {
34+ # "brand": "Apple",
35+ # "family": "Mac",
36+ # "model": "Mac"
37+ # },
38+ # "os": {
39+ # "family": "Mac OS X",
40+ # "major": "10",
41+ # "minor": "15",
42+ # "patch": null,
43+ # "patch_minor": null
44+ # },
45+ # "user_agent": {
46+ # "family": "Firefox",
47+ # "major": "140",
48+ # "minor": "0",
49+ # "patch": null,
50+ # "patch_minor": null
51+ # },
52+ # "string": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:140.0) Gecko/20100101 Firefox/140.0"
53+ # }
3554# }
3655# }
3756# }
115134# ddb: dumb database
116135class DDB (dict ):
117136 def __init__ (self , d = {}, max_size = 10000 ):
118- # TODO - trying to retain dict compatibility is becoming a mess with the call to super, the call to load, and the below d.items loop. Something needs to change here.
119- super ().__init__ (d )
120137 self ._max_size = max_size
121138
122- self .load ()
139+ if d :
140+ # The user passed in data, so assume we should load that instead of data from disk.
141+ for k , v in d .items ():
142+ if isinstance (v , dict ):
143+ self [k ] = _DDB (v , max_size = max_size )
144+ else :
145+ raise TypeError (f"Invalid type for value { v } in dictionary. Expected dict, got { type (v )} " )
146+ else :
147+ self .load ()
123148
124- for k , v in d .items ():
125- if isinstance (v , dict ):
126- self [k ] = _DDB (v , max_size = max_size )
127- else :
128- raise TypeError (f"Invalid type for value { v } in dictionary. Expected dict, got { type (v )} " )
149+ def _get_id (self ):
150+ return request .args .get ("id" )
129151
130152 def register (self ):
131- id = request . args . get ( "id" )
153+ id = self . _get_id ( )
132154 if id in self :
133155 raise KeyError (f"ID { id } already registered" )
134156 super ().__setitem__ (id , _DDB (max_size = self ._max_size ))
135157
136158 def unregister (self ):
137- id = request . args . get ( "id" )
159+ id = self . _get_id ( )
138160 if id in self :
139161 return self .pop (id )
140162
141163 def __call__ (self ):
142- id = request . args . get ( "id" )
164+ id = self . _get_id ( )
143165 data_set = {}
144166 for flask_request_key , serializer in C .FLASK_REQUEST_SERIALIZERS .items ():
145167 data_set [flask_request_key ] = serializer (getattr (request , flask_request_key , None ))
@@ -180,124 +202,29 @@ def load(self, filename="uadb.json"):
180202
181203 @property
182204 def browser_family_counts (self ):
183- browser_family_stats = defaultdict (int )
184- for id , ddb in self .items ():
185- for browser_family , count_val in ddb .browser_family_counts .items ():
186- browser_family_stats [browser_family ] += count_val
187- return browser_family_stats
188-
189- @property
190- def browser_family_counts_by_id (self ):
191- browser_family_stats = defaultdict (lambda : defaultdict (int ))
192- for id , ddb in self .items ():
193- for browser_family , count in ddb .browser_family_counts_by_remote_addr .items ():
194- for remote_addr , count_val in count .items ():
195- browser_family_stats [id ][browser_family ] += count_val
196- return browser_family_stats
197-
198- @property
199- def browser_family_counts_by_remote_addr (self ):
200- browser_family_stats = defaultdict (lambda : defaultdict (int ))
201- for id , ddb in self .items ():
202- for browser_family , count in ddb .browser_family_counts_by_remote_addr .items ():
203- for remote_addr , count_val in count .items ():
204- browser_family_stats [browser_family ][remote_addr ] += count_val
205- return browser_family_stats
206-
207- @property
208- def browser_family_counts_by_id_by_remote_addr (self ):
209- browser_family_stats = defaultdict (lambda : defaultdict (lambda : defaultdict (int )))
210- for id , ddb in self .items ():
211- for browser_family , count in ddb .browser_family_counts_by_remote_addr .items ():
212- for remote_addr , count_val in count .items ():
213- browser_family_stats [id ][browser_family ][remote_addr ] += count_val
214- return browser_family_stats
215-
205+ return self ._get_counts ("browser_family_counts" )
216206
217207 @property
218208 def os_family_counts (self ):
219- os_family_stats = defaultdict (int )
220- for id , ddb in self .items ():
221- for os_family , count_val in ddb .os_family_counts .items ():
222- os_family_stats [os_family ] += count_val
223- return os_family_stats
224-
225- @property
226- def os_family_counts_by_id (self ):
227- os_family_stats = defaultdict (lambda : defaultdict (int ))
228- for id , ddb in self .items ():
229- for os_family , count in ddb .os_family_counts_by_remote_addr .items ():
230- for remote_addr , count_val in count .items ():
231- os_family_stats [id ][os_family ] += count_val
232- return os_family_stats
233-
234- # @property # old way
235- # def os_family_counts_by_remote_addr(self):
236- # os_family_stats = defaultdict(lambda: defaultdict(int))
237- # for id, ddb in self.items():
238- # for os_family, count in ddb.os_family_counts_by_remote_addr.items():
239- # for remote_addr, count_val in count.items():
240- # os_family_stats[os_family][remote_addr] += count_val
241- # return os_family_stats
242-
243- @property # New way with method re-use
244- def os_family_counts_by_remote_addr (self ):
245- detailed_os_family_stats = self .os_family_counts_by_id_by_remote_addr
246- os_family_stats = defaultdict (lambda : defaultdict (int ))
247- for id , remote_addr_stats in detailed_os_family_stats .items ():
248- for remote_addr , os_family in remote_addr_stats .items ():
249- for os_family , count_val in os_family .items ():
250- os_family_stats [remote_addr ][os_family ] += count_val
251- return os_family_stats
252-
253- @property
254- def os_family_counts_by_id_by_remote_addr (self ):
255- os_family_stats = defaultdict (lambda : defaultdict (lambda : defaultdict (int )))
256- for id , ddb in self .items ():
257- for os_family , count in ddb .os_family_counts_by_remote_addr .items ():
258- for remote_addr , count_val in count .items ():
259- os_family_stats [id ][os_family ][remote_addr ] += count_val
260- return os_family_stats
209+ return self ._get_counts ("os_family_counts" )
261210
262211 @property
263212 def referrer_counts (self ):
264- referrers_stats = defaultdict (int )
265- for id , ddb in self .items ():
266- for referrer , count_val in ddb .referrer_counts .items ():
267- referrers_stats [referrer ] += count_val
268- return referrers_stats
213+ return self ._get_counts ("referrer_counts" )
269214
270- @property
271- def referrer_counts_by_id (self ):
272- referrer_stats = defaultdict (lambda : defaultdict (int ))
215+ def _get_counts (self , property ):
216+ stats = defaultdict (lambda : defaultdict (lambda : defaultdict (int )))
273217 for id , ddb in self .items ():
274- for referrer , count in ddb .referrer_counts_by_remote_addr .items ():
275- for remote_addr , count_val in count .items ():
276- referrer_stats [id ][remote_addr ] += count_val
277- return referrer_stats
278-
279- @property
280- def referrer_counts_by_remote_addr (self ):
281- referrer_stats = defaultdict (lambda : defaultdict (int ))
282- for id , ddb in self .items ():
283- for referrer , count in ddb .referrer_counts_by_remote_addr .items ():
284- for remote_addr , count_val in count .items ():
285- referrer_stats [referrer ][remote_addr ] += count_val
286- return referrer_stats
287-
288- @property
289- def referrer_counts_by_id_by_remote_addr (self ):
290- referrer_stats = defaultdict (lambda : defaultdict (lambda : defaultdict (int )))
291- for id , ddb in self .items ():
292- for referrer , count in ddb .referrer_counts_by_remote_addr .items ():
293- for remote_addr , count_val in count .items ():
294- referrer_stats [id ][referrer ][remote_addr ] += count_val
295- return referrer_stats
218+ for referrer , count in getattr (ddb , property ).items ():
219+ for value , _count in count .items ():
220+ stats [id ][referrer ][value ] += _count
221+ return stats
296222
297223
298224class _DDB (dict ):
299225 def __init__ (self , d = {}, max_size = 10000 ):
300- super ().__init__ (d )
226+ for k , v in d .items ():
227+ self [k ] = v
301228 self ._max_size = max_size
302229
303230 def __add__ (self , user_data ):
@@ -308,63 +235,31 @@ def __add__(self, user_data):
308235 self ._cleanup ()
309236 return self
310237
311- def _get_user_agent (self , timestamp ):
312- a = parse (self [timestamp ]['user_agent' ])
313- return a
314-
315238 def _cleanup (self ):
316- now = time ()
317239 while len (self ) > self ._max_size :
318240 # it's silly that we have to cast to list here, but dict_keys is not subscriptable
319241 del self [list (self .keys ())[0 ]]
320242
321243 @property
322244 def browser_family_counts (self ):
323- browser_family = defaultdict (int )
324- for timestamp in self .keys ():
325- browser = self ._get_user_agent (timestamp )
326- browser_family [browser .user_agent .family if browser else "Unknown" ] += 1
327- return browser_family
328-
329- @property
330- def browser_family_counts_by_remote_addr (self ):
331- browser_family = defaultdict (lambda : defaultdict (int ))
332- for timestamp in self .keys ():
333- browser = self ._get_user_agent (timestamp )
334- remote_addr = self [timestamp ]['remote_addr' ]
335- browser_family [remote_addr ][browser .user_agent .family if browser else "Unknown" ] += 1
336- return browser_family
245+ return self ._get_counts ("family" , parents = ["user_agent" , "user_agent" ])
337246
338247 @property
339248 def os_family_counts (self ):
340- os_family = defaultdict (int )
341- for timestamp in self .keys ():
342- _os = self ._get_user_agent (timestamp ).os # '_os' has leading underscore to avoid conflicts with the 'os' module
343- os_family [_os .family if _os else "Unknown" ] += 1
344- return os_family
345-
346- @property
347- def os_family_counts_by_remote_addr (self ):
348- os_family = defaultdict (lambda : defaultdict (int ))
349- for timestamp in self .keys ():
350- _os = self ._get_user_agent (timestamp ).os # '_os' has leading underscore to avoid conflicts with the 'os' module
351- remote_addr = self [timestamp ]['remote_addr' ]
352- os_family [remote_addr ][_os .family if _os else "Unknown" ] += 1
353- return os_family
249+ return self ._get_counts ("family" , parents = ["user_agent" , "os" ])
354250
355251 @property
356252 def referrer_counts (self ):
357- referrers = defaultdict (int )
358- for timestamp in self .keys ():
359- referrer = self [timestamp ]['referrer' ]
360- referrers [referrer if referrer else "Unknown" ] += 1
361- return referrers
253+ return self ._get_counts ("referrer" )
362254
363- @property
364- def referrer_counts_by_remote_addr (self ):
365- referrers = defaultdict (lambda : defaultdict (int ))
255+ def _get_counts (self , property , parents = []):
256+ return_data = defaultdict (lambda : defaultdict (int ))
366257 for timestamp in self .keys ():
367- referrer = self [timestamp ]['referrer' ]
368- remote_addr = self [timestamp ]['remote_addr' ]
369- referrers [remote_addr ][referrer if referrer else "Unknown" ] += 1
370- return referrers
258+ reference_object = self [timestamp ]
259+ for parent in parents :
260+ reference_object = reference_object .get (parent ) or "Unknown"
261+
262+ key = reference_object .get (property ) or "Unknown"
263+ remote_addr = self [timestamp ]["remote_addr" ]
264+ return_data [remote_addr ][key ] += 1
265+ return return_data
0 commit comments