@@ -2757,61 +2757,85 @@ def api_set_crawler_max_captures(data):
27572757## TEST ##
27582758
27592759def is_test_ail_crawlers_successful ():
2760- return r_db .hget ('crawler:tor:test' , 'success' ) == 'True'
2760+ web_success = r_db .hget ('crawler:tor:test' , 'web_success' )
2761+ onion_success = r_db .hget ('crawler:tor:test' , 'onion_success' )
2762+ return web_success == 'True' or onion_success == 'True'
27612763
27622764def get_test_ail_crawlers_message ():
2763- return r_db .hget ('crawler:tor:test' , 'message' )
2764-
2765- def save_test_ail_crawlers_result (test_success , message ):
2766- r_db .hset ('crawler:tor:test' , 'success' , str (test_success ))
2767- r_db .hset ('crawler:tor:test' , 'message' , message )
2765+ metadata = get_test_ail_crawlers_metadata ()
2766+ return f"Web: { metadata ['web_message' ]} \n Onion: { metadata ['onion_message' ]} "
2767+
2768+ def get_test_ail_crawlers_metadata ():
2769+ metadata = {
2770+ 'web_success' : r_db .hget ('crawler:tor:test' , 'web_success' ),
2771+ 'web_message' : r_db .hget ('crawler:tor:test' , 'web_message' ),
2772+ 'onion_success' : r_db .hget ('crawler:tor:test' , 'onion_success' ),
2773+ 'onion_message' : r_db .hget ('crawler:tor:test' , 'onion_message' ),
2774+ 'date_test' : r_db .hget ('crawler:tor:test' , 'date_test' )
2775+ }
2776+ if metadata ['web_success' ] is None :
2777+ metadata ['web_success' ] = 'False'
2778+ if not metadata ['web_message' ]:
2779+ metadata ['web_message' ] = 'Web crawler test has not been run yet.'
2780+ if metadata ['onion_success' ] is None :
2781+ metadata ['onion_success' ] = 'False'
2782+ if not metadata ['onion_message' ]:
2783+ metadata ['onion_message' ] = 'Onion crawler test has not been run yet.'
2784+ if not metadata ['date_test' ]:
2785+ metadata ['date_test' ] = 'Unknown'
2786+ return metadata
2787+
2788+ def save_test_ail_crawlers_result (web_success , web_message , onion_success , onion_message , date_test ):
2789+ r_db .hset ('crawler:tor:test' , 'web_success' , str (web_success ))
2790+ r_db .hset ('crawler:tor:test' , 'web_message' , web_message )
2791+ r_db .hset ('crawler:tor:test' , 'onion_success' , str (onion_success ))
2792+ r_db .hset ('crawler:tor:test' , 'onion_message' , onion_message )
2793+ r_db .hset ('crawler:tor:test' , 'date_test' , date_test )
2794+
2795+ def _run_lacus_network_test (lacus , user_agent , url , expected_text , proxy = None ):
2796+ enqueue_kwargs = {'url' : url , 'depth' : 0 , 'user_agent' : user_agent , 'force' : True , 'general_timeout_in_sec' : 90 }
2797+ if proxy :
2798+ enqueue_kwargs ['proxy' ] = proxy
2799+ capture_uuid = lacus .enqueue (** enqueue_kwargs )
2800+ status = lacus .get_capture_status (capture_uuid )
2801+ launch_time = int (time .time ())
2802+ while int (time .time ()) - launch_time < 90 and status != CaptureStatus .DONE :
2803+ time .sleep (1 )
2804+ status = lacus .get_capture_status (capture_uuid )
2805+ entries = lacus .get_capture (capture_uuid )
2806+ if 'error' in entries :
2807+ return False , entries ['error' ]
2808+ if 'html' in entries and entries ['html' ]:
2809+ if expected_text in entries ['html' ]:
2810+ return True , f'Expected content "{ expected_text } " found.'
2811+ return False , f'Expected content "{ expected_text } " not found.'
2812+ if status == 2 :
2813+ return False , 'Timeout Error'
2814+ return False , 'Error'
27682815
27692816def test_ail_crawlers ():
2770- # # TODO: test web domain
2817+ date_test = datetime . now (). strftime ( "%Y-%m-%d %H:%M:%S" )
27712818 if not ping_lacus ():
27722819 lacus_url = get_lacus_url ()
27732820 error_message = f'Error: Can\' t connect to AIL Lacus, { lacus_url } '
27742821 print (error_message )
2775- save_test_ail_crawlers_result (False , error_message )
2822+ save_test_ail_crawlers_result (False , error_message , False , error_message , date_test )
27762823 return False
27772824
27782825 lacus = get_lacus ()
27792826 commit_id = git_status .get_last_commit_id_from_local ()
27802827 user_agent = f'{ commit_id } -AIL LACUS CRAWLER'
2781- # domain = 'eswpccgr5xyovsahffkehgleqthrasfpfdblwbs4lstd345dwq5qumqd.onion'
2782- url = 'http://eswpccgr5xyovsahffkehgleqthrasfpfdblwbs4lstd345dwq5qumqd.onion'
2783-
2784- ## LAUNCH CRAWLER, TEST MODE ##
2785- # set_current_crawler_status(splash_url, 'CRAWLER TEST', started_time=True,
2786- # crawled_domain='TEST DOMAIN', crawler_type='onion')
2787- capture_uuid = lacus .enqueue (url = url , depth = 0 , user_agent = user_agent , proxy = 'force_tor' ,
2788- force = True , general_timeout_in_sec = 90 )
2789- status = lacus .get_capture_status (capture_uuid )
2790- launch_time = int (time .time ()) # capture timeout
2791- while int (time .time ()) - launch_time < 90 and status != CaptureStatus .DONE :
2792- # DEBUG
2793- print (int (time .time ()) - launch_time )
2794- print (status )
2795- time .sleep (1 )
2796- status = lacus .get_capture_status (capture_uuid )
27972828
2798- # TODO CRAWLER STATUS OR QUEUED CAPTURE LIST
2799- entries = lacus .get_capture (capture_uuid )
2800- if 'error' in entries :
2801- save_test_ail_crawlers_result (False , entries ['error' ])
2802- return False
2803- elif 'html' in entries and entries ['html' ]:
2804- mess = 'It works!'
2805- if mess in entries ['html' ]:
2806- save_test_ail_crawlers_result (True , mess )
2807- return True
2808- else :
2809- return False
2810- elif status == 2 :
2811- save_test_ail_crawlers_result (False , 'Timeout Error' )
2812- else :
2813- save_test_ail_crawlers_result (False , 'Error' )
2814- return False
2829+ web_success , web_message = _run_lacus_network_test (lacus , user_agent , 'https://ail-project.org/' , 'AIL Project' )
2830+ onion_success , onion_message = _run_lacus_network_test (
2831+ lacus ,
2832+ user_agent ,
2833+ 'http://eswpccgr5xyovsahffkehgleqthrasfpfdblwbs4lstd345dwq5qumqd.onion' ,
2834+ 'It works!' ,
2835+ proxy = 'force_tor'
2836+ )
2837+ save_test_ail_crawlers_result (web_success , web_message , onion_success , onion_message , date_test )
2838+ return web_success or onion_success
28152839
28162840#### ---- ####
28172841
0 commit comments