1313import json
1414import time
1515import urllib .request
16- from datetime import datetime , timezone
16+ from datetime import UTC , datetime
1717from pathlib import Path
1818
1919import click
@@ -54,7 +54,10 @@ def cli() -> None:
5454@cli .command ()
5555@click .argument ("target" )
5656@click .option ("--schema" , default = None , help = "Path to a Python file with a BaseSchema subclass." )
57- @click .option ("--config" , "config_path" , default = None , help = "Path to scraperguard.yaml config file." )
57+ @click .option (
58+ "--config" , "config_path" , default = None ,
59+ help = "Path to scraperguard.yaml config file." ,
60+ )
5861@click .option ("--run-id" , default = None , help = "Run ID to group with (creates new if not provided)." )
5962@click .option ("--selectors" , default = None , help = "Comma-separated CSS selectors to track." )
6063@click .option ("--store-raw-html" , is_flag = True , default = False , help = "Store raw HTML in snapshot." )
@@ -121,7 +124,7 @@ def run(
121124 metadata = SnapshotMetadata (
122125 http_status = http_status ,
123126 latency_ms = latency_ms ,
124- timestamp = datetime .now (timezone . utc ),
127+ timestamp = datetime .now (UTC ),
125128 headers = headers ,
126129 response_size_bytes = len (html .encode ("utf-8" )),
127130 )
@@ -156,7 +159,8 @@ def run(
156159 click .echo (f"Warning: Drift analysis failed: { exc } " , err = True )
157160 storage .save_validation_result (validation_result )
158161 click .echo (
159- f"Schema validation: { validation_result .passed_count } /{ validation_result .total_items } passed"
162+ f"Schema validation: "
163+ f"{ validation_result .passed_count } /{ validation_result .total_items } passed"
160164 )
161165 except SchemaLoadError as exc :
162166 click .echo (f"Warning: Schema load failed: { exc } " , err = True )
@@ -168,7 +172,6 @@ def run(
168172 if selector_list :
169173 try :
170174 current_tree = parse_to_tree (snapshot .normalized_html )
171- prev_snapshot = storage .get_latest_snapshot (url )
172175 # get_latest_snapshot might return the one we just saved; get the one before
173176 snapshots = storage .list_snapshots (url , limit = 2 )
174177 prev_tree = None
@@ -193,7 +196,9 @@ def run(
193196 if s .id != snapshot .id :
194197 prev_snapshot_obj = s
195198 break
196- if prev_snapshot_obj and should_diff (snapshot .fingerprint , prev_snapshot_obj .fingerprint ):
199+ if prev_snapshot_obj and should_diff (
200+ snapshot .fingerprint , prev_snapshot_obj .fingerprint ,
201+ ):
197202 before_tree = parse_to_tree (prev_snapshot_obj .normalized_html )
198203 after_tree = parse_to_tree (snapshot .normalized_html )
199204 dom_changes = diff_trees (before_tree , after_tree )
@@ -208,7 +213,9 @@ def run(
208213 if s .id != snapshot .id :
209214 prev_snapshot_obj = s
210215 break
211- if prev_snapshot_obj and should_diff (snapshot .fingerprint , prev_snapshot_obj .fingerprint ):
216+ if prev_snapshot_obj and should_diff (
217+ snapshot .fingerprint , prev_snapshot_obj .fingerprint ,
218+ ):
212219 before_tree = parse_to_tree (prev_snapshot_obj .normalized_html )
213220 after_tree = parse_to_tree (snapshot .normalized_html )
214221 dom_changes = diff_trees (before_tree , after_tree )
0 commit comments