11"""webcache module"""
22
33import traceback
4- from threading import Semaphore
4+ import time
55import json
66import os
7+ import asyncio
78from os .path import join
89from hashlib import md5
910from pathlib import Path
@@ -50,11 +51,16 @@ def __init__(self,**kwargs):
5051 self .settings .setdefault ("dont_return_data" ,False )
5152 self .settings .setdefault ("return_only_cache_files" ,False )
5253 self .settings .setdefault ("dont_cache_data" ,False )
53- self .settings .setdefault ("max_requests" ,1000 )
54+ self .settings .setdefault ("max_requests" ,40 )
55+ self .settings .setdefault ("throttle_period" ,1 )
56+ self .settings .setdefault ("throttle_limit" ,40 )
5457 if self .settings ['verbose' ]:
5558 print ("WebCaches settings:" ,self .settings )
5659
57- self .request_lock = Semaphore (self .settings ['max_requests' ])
60+ self .request_lock = asyncio .Semaphore (self .settings ['max_requests' ])
61+ self .num_requests = 0
62+ self .next_reset_at = 0
63+
5864 Path (self .settings ['webcache_dir' ]).mkdir (parents = True , exist_ok = True )
5965
6066 def clean (self ):
@@ -67,33 +73,58 @@ def clean(self):
6773
6874 async def get_from_web (self ,url : str ) -> dict :
6975 """get_from_web method"""
70- async with aiohttp .ClientSession () as session :
71- async with session .request ('GET' , url ,auth = self .auth ) as response :
72- response .raise_for_status ()
73- _j = await response .json ()
74- _t = await response .text ()
75- return _j , _t
76+
77+ while True :
78+
79+ now = time .time ()
80+
81+ # reset the count if the period passed
82+ if now > self .next_reset_at :
83+ self .num_requests = 0
84+ self .next_reset_at = now + self .settings ['throttle_period' ]
85+
86+ # if exceed max rate, need to wait
87+ if self .num_requests >= self .settings ['throttle_limit' ]:
88+ await asyncio .sleep (0 )
89+ else :
90+ break
91+
92+ self .num_requests += 1
93+
94+ async with self .request_lock :
95+ async with aiohttp .ClientSession () as session :
96+ async with session .request ('GET' , url ,auth = self .auth ) as response :
97+ response .raise_for_status ()
98+ _j = await response .json ()
99+ _t = await response .text ()
100+ return _j , _t
76101
77102 async def get (self ,url ):
78103 """get method"""
104+
79105 if self .settings ['dont_cache_data' ]:
80106 data , _ = await self .get_from_web (url )
81107 return data
108+
109+ verbose = self .settings ['verbose' ]
110+
82111 fname = md5 (url .encode ('utf-8' )).hexdigest ()+ ".json"
112+
83113 if self .settings ['return_only_cache_files' ]:
84114 return fname
115+
85116 fname = join (self .settings ['webcache_dir' ],fname )
86117 data = await get_from_file (fname ,self .settings ['dont_read_files' ])
87118 if data is None :
88- with self .request_lock :
89- data , text = await self .get_from_web (url )
119+ data , text = await self .get_from_web (url )
90120 async with aiofiles .open (fname , mode = 'w+' ,encoding = 'utf8' ) as _fp :
91121 await _fp .write (text )
92- if self . settings [ ' verbose' ] :
122+ if verbose :
93123 print (url ,len (text ),"W" )
94124 else :
95- if self . settings [ ' verbose' ] :
125+ if verbose :
96126 print (url ,len (str (data )),"C" )
97127 if self .settings ['dont_return_data' ]:
98128 return {}
129+
99130 return data
0 commit comments