11"""webcache module"""
22
33import traceback
4+ from threading import Semaphore
45import json
56import os
67from os .path import join
@@ -37,25 +38,33 @@ async def get_from_file(fname,dont_read=False):
3738class WebCache :
3839 """WebCache class"""
3940 def __init__ (self ,** kwargs ):
40- if kwargs . get ( 'verbose' , False ):
41- print ( "WebCaches args:" , kwargs )
41+ self . settings = kwargs
42+
4243 auth_tuple = kwargs .get ("auth" ,None )
4344 self .auth = BasicAuth (auth_tuple [0 ].gets (),auth_tuple [1 ].gets ()) \
4445 if auth_tuple is not None else None
45- self .verbose = kwargs .get ("verbose" ,False )
46- self .dir = kwargs .get ("webcache_dir" ,"web-cache" )
47- self .dont_read_files = kwargs .get ("dont_read_files" ,False )
48- self .dont_return_data = kwargs .get ("dont_return_data" ,False )
49- self .return_only_cache_files = kwargs .get ("return_only_cache_files" ,False )
50- self .dont_cache_data = kwargs .get ("dont_cache_data" ,False )
51- Path (self .dir ).mkdir (parents = True , exist_ok = True )
46+
47+ self .settings .setdefault ("verbose" ,False )
48+ self .settings .setdefault ("webcache_dir" ,"web-cache" )
49+ self .settings .setdefault ("dont_read_files" ,False )
50+ self .settings .setdefault ("dont_return_data" ,False )
51+ self .settings .setdefault ("return_only_cache_files" ,False )
52+ self .settings .setdefault ("dont_cache_data" ,False )
53+ self .settings .setdefault ("max_requests" ,1000 )
54+ if self .settings ['verbose' ]:
55+ print ("WebCaches settings:" ,self .settings )
56+
57+ self .request_lock = Semaphore (self .settings ['max_requests' ])
58+ Path (self .settings ['webcache_dir' ]).mkdir (parents = True , exist_ok = True )
59+
5260 def clean (self ):
5361 """clean method"""
5462 try :
55- shutil .rmtree (self .dir )
63+ shutil .rmtree (self .settings [ 'webcache_dir' ] )
5664 except OSError :
5765 print ("Error at TUCache clean:\n " + str (traceback .format_exc ()))
58- Path (self .dir ).mkdir (parents = True , exist_ok = True )
66+ Path (self .settings ['webcache_dir' ]).mkdir (parents = True , exist_ok = True )
67+
5968 async def get_from_web (self ,url : str ) -> dict :
6069 """get_from_web method"""
6170 async with aiohttp .ClientSession () as session :
@@ -64,25 +73,27 @@ async def get_from_web(self,url: str) -> dict:
6473 _j = await response .json ()
6574 _t = await response .text ()
6675 return _j , _t
76+
6777 async def get (self ,url ):
6878 """get method"""
69- if self .dont_cache_data :
79+ if self .settings [ ' dont_cache_data' ] :
7080 data , _ = await self .get_from_web (url )
7181 return data
7282 fname = md5 (url .encode ('utf-8' )).hexdigest ()+ ".json"
73- if self .return_only_cache_files :
83+ if self .settings [ ' return_only_cache_files' ] :
7484 return fname
75- fname = join (self .dir ,fname )
76- data = await get_from_file (fname ,self .dont_read_files )
85+ fname = join (self .settings [ 'webcache_dir' ] ,fname )
86+ data = await get_from_file (fname ,self .settings [ ' dont_read_files' ] )
7787 if data is None :
78- data , text = await self .get_from_web (url )
88+ with self .request_lock :
89+ data , text = await self .get_from_web (url )
7990 async with aiofiles .open (fname , mode = 'w+' ,encoding = 'utf8' ) as _fp :
8091 await _fp .write (text )
81- if self .verbose :
92+ if self .settings [ ' verbose' ] :
8293 print (url ,len (text ),"W" )
8394 else :
84- if self .verbose :
95+ if self .settings [ ' verbose' ] :
8596 print (url ,len (str (data )),"C" )
86- if self .dont_return_data :
97+ if self .settings [ ' dont_return_data' ] :
8798 return {}
8899 return data
0 commit comments