Skip to content

Commit d684dc5

Browse files
committed
added a throttle limiter with default values for trackunit
1 parent e861930 commit d684dc5

1 file changed

Lines changed: 44 additions & 13 deletions

File tree

pytrackunit/webcache.py

Lines changed: 44 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
11
"""webcache module"""
22

33
import traceback
4-
from threading import Semaphore
4+
import time
55
import json
66
import os
7+
import asyncio
78
from os.path import join
89
from hashlib import md5
910
from pathlib import Path
@@ -50,11 +51,16 @@ def __init__(self,**kwargs):
5051
self.settings.setdefault("dont_return_data",False)
5152
self.settings.setdefault("return_only_cache_files",False)
5253
self.settings.setdefault("dont_cache_data",False)
53-
self.settings.setdefault("max_requests",1000)
54+
self.settings.setdefault("max_requests",40)
55+
self.settings.setdefault("throttle_period",1)
56+
self.settings.setdefault("throttle_limit",40)
5457
if self.settings['verbose']:
5558
print("WebCaches settings:",self.settings)
5659

57-
self.request_lock = Semaphore(self.settings['max_requests'])
60+
self.request_lock = asyncio.Semaphore(self.settings['max_requests'])
61+
self.num_requests = 0
62+
self.next_reset_at = 0
63+
5864
Path(self.settings['webcache_dir']).mkdir(parents=True, exist_ok=True)
5965

6066
def clean(self):
@@ -67,33 +73,58 @@ def clean(self):
6773

6874
async def get_from_web(self,url: str) -> dict:
6975
"""get_from_web method"""
70-
async with aiohttp.ClientSession() as session:
71-
async with session.request('GET', url,auth=self.auth) as response:
72-
response.raise_for_status()
73-
_j = await response.json()
74-
_t = await response.text()
75-
return _j , _t
76+
77+
while True:
78+
79+
now = time.time()
80+
81+
# reset the count if the period passed
82+
if now > self.next_reset_at:
83+
self.num_requests = 0
84+
self.next_reset_at = now + self.settings['throttle_period']
85+
86+
# if exceed max rate, need to wait
87+
if self.num_requests >= self.settings['throttle_limit']:
88+
await asyncio.sleep(0)
89+
else:
90+
break
91+
92+
self.num_requests += 1
93+
94+
async with self.request_lock:
95+
async with aiohttp.ClientSession() as session:
96+
async with session.request('GET', url,auth=self.auth) as response:
97+
response.raise_for_status()
98+
_j = await response.json()
99+
_t = await response.text()
100+
return _j , _t
76101

77102
async def get(self,url):
78103
"""get method"""
104+
79105
if self.settings['dont_cache_data']:
80106
data, _ = await self.get_from_web(url)
81107
return data
108+
109+
verbose = self.settings['verbose']
110+
82111
fname = md5(url.encode('utf-8')).hexdigest()+".json"
112+
83113
if self.settings['return_only_cache_files']:
84114
return fname
115+
85116
fname = join(self.settings['webcache_dir'],fname)
86117
data = await get_from_file(fname,self.settings['dont_read_files'])
87118
if data is None:
88-
with self.request_lock:
89-
data, text = await self.get_from_web(url)
119+
data, text = await self.get_from_web(url)
90120
async with aiofiles.open(fname, mode='w+',encoding='utf8') as _fp:
91121
await _fp.write(text)
92-
if self.settings['verbose']:
122+
if verbose:
93123
print(url,len(text),"W")
94124
else:
95-
if self.settings['verbose']:
125+
if verbose:
96126
print(url,len(str(data)),"C")
97127
if self.settings['dont_return_data']:
98128
return {}
129+
99130
return data

0 commit comments

Comments
 (0)