Skip to content

Commit b9c8c52

Browse files
committed
chg: [crawler settings] display recent crawler logs in settings page
1 parent 37f4141 commit b9c8c52

3 files changed

Lines changed: 38 additions & 1 deletion

File tree

bin/lib/crawlers.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
import sys
1717
import time
1818
import uuid
19+
from collections import deque
1920

2021
from multiprocessing import Process as Proc
2122

@@ -188,6 +189,23 @@ def get_date_crawled_items_source(date):
188189
def get_har_dir():
189190
return HAR_DIR
190191

192+
193+
def get_last_crawler_logs(lines=100):
194+
log_path = os.path.join(os.environ['AIL_HOME'], 'logs', 'crawlers.log')
195+
if not os.path.exists(log_path):
196+
return ['No crawler logs available.']
197+
if os.path.getsize(log_path) == 0:
198+
return ['Crawler log file is empty.']
199+
try:
200+
with open(log_path, 'r', encoding='utf-8', errors='replace') as f:
201+
last_lines = deque(f, maxlen=lines)
202+
except OSError:
203+
return ['No crawler logs available.']
204+
205+
if not last_lines:
206+
return ['Crawler log file is empty.']
207+
return [line.rstrip('\n') for line in last_lines]
208+
191209
def is_valid_onion_v3_domain(domain):
192210
if len(domain) == 62: # v3 address
193211
return domain[:56].isalnum()

var/www/blueprints/crawler_splash.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ def create_json_response(data, status_code):
6161
return Response(json.dumps(data, indent=2, sort_keys=True), mimetype='application/json'), status_code
6262

6363

64+
6465
# ============= ROUTES ==============
6566
@crawler_splash.route("/crawlers/dashboard", methods=['GET'])
6667
@login_required
@@ -1062,6 +1063,7 @@ def crawler_settings():
10621063

10631064
is_onion_filter_enabled = crawlers.is_onion_filter_enabled(cache=False)
10641065
is_onion_filter_unknown = crawlers.is_onion_filter_unknown(cache=False)
1066+
crawler_logs = crawlers.get_last_crawler_logs(lines=100)
10651067

10661068
# TODO REGISTER PROXY
10671069
# all_proxies = crawlers.get_all_proxies_metadata()
@@ -1076,7 +1078,8 @@ def crawler_settings():
10761078
is_crawler_working=is_crawler_working,
10771079
crawler_error_mess=crawler_error_mess,
10781080
is_onion_filter_enabled=is_onion_filter_enabled,
1079-
is_onion_filter_unknown=is_onion_filter_unknown
1081+
is_onion_filter_unknown=is_onion_filter_unknown,
1082+
crawler_logs=crawler_logs
10801083
)
10811084

10821085

var/www/templates/crawler/crawler_splash/settings_crawler.html

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,12 @@
5656
max-height: 260px;
5757
overflow-y: auto;
5858
}
59+
.crawler-logs-output {
60+
border-radius: .5rem;
61+
max-height: 360px;
62+
overflow-y: auto;
63+
white-space: pre;
64+
}
5965
</style>
6066
</head>
6167

@@ -225,6 +231,16 @@ <h5 class="card-title d-flex justify-content-between align-items-center flex-wra
225231
</div>
226232
</div>
227233

234+
235+
<div class="card border-secondary my-4">
236+
<div class="card-body text-dark">
237+
<h5 class="card-title mb-1">Recent crawler logs</h5>
238+
<p class="text-muted mb-3">Recent crawler errors and status messages from <code>logs/crawlers.log</code> (last 100 lines).</p>
239+
<pre class="bg-dark text-white p-3 crawler-logs-output">{% for line in crawler_logs %}{{ line }}
240+
{% endfor %}</pre>
241+
</div>
242+
</div>
243+
228244
<a href="{{ url_for('crawler_splash.crawler_blacklist') }}" class="btn btn-outline-danger">
229245
Blacklisted domains
230246
</a>

0 commit comments

Comments
 (0)