Skip to content

Commit b1b40f5

Browse files
committed
Change date_imported to date_collected for comparison
Signed-off-by: Tushar Goel <tushar.goel.dav@gmail.com>
1 parent 35e1042 commit b1b40f5

File tree

2 files changed

+17
-14
lines changed

2 files changed

+17
-14
lines changed

vulnerabilities/pipelines/recompute_content_ids.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,7 @@ def process_advisories(
9999
log(f"Running function in single process", level=logging.INFO)
100100
for advisory_ids in progress.iter(advisory_batches):
101101
progress.log_progress()
102-
advisory_func(advisory_ids=advisory_ids, logger=log)
102+
advisory_func(advisory_ids=advisory_ids, log=log)
103103
return
104104

105105
log(
@@ -147,23 +147,23 @@ def get_advisory_batches(advisories, batch_size=1000, log=None):
147147
yield advisory_ids
148148

149149

150-
def recompute_content_ids(advisory_ids, logger):
150+
def recompute_content_ids(advisory_ids, log):
151151
"""
152152
Recompute content IDs for all `advisory_ids`.
153153
"""
154154
advisories = Advisory.objects.exclude(unique_content_id__length=64).filter(id__in=advisory_ids)
155155
total_count = advisories.count()
156156

157157
if not total_count:
158-
logger("No advisories need content ID recomputation", level=logging.INFO)
158+
log("No advisories need content ID recomputation", level=logging.INFO)
159159
return
160160

161-
logger(f"Recomputing content IDs for {total_count} advisories", level=logging.INFO)
161+
log(f"Recomputing content IDs for {total_count} advisories", level=logging.INFO)
162162

163163
progress = LoopProgress(
164164
total_iterations=total_count,
165165
progress_step=total_count // 100,
166-
logger=logger,
166+
logger=log,
167167
)
168168

169169
with transaction.atomic():
@@ -181,8 +181,8 @@ def recompute_content_ids(advisory_ids, logger):
181181
["unique_content_id"],
182182
batch_size=len(advisories_to_update),
183183
)
184-
if logger:
185-
logger(
184+
if log:
185+
log(
186186
f"Updated content IDs for {len(advisories_to_update)} advisories",
187187
level=logging.INFO,
188188
)

vulnerabilities/pipelines/remove_duplicate_advisories.py

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
#
99

1010
import logging
11+
import traceback
1112
from itertools import groupby
1213

1314
from aboutcode.pipeline import LoopProgress
@@ -19,7 +20,7 @@
1920
from vulnerabilities.pipelines.recompute_content_ids import process_advisories
2021

2122

22-
def remove_duplicates_batch(advisory_ids, logger=None):
23+
def remove_duplicates_batch(advisory_ids, log=None):
2324
"""
2425
Process a batch of advisories to remove duplicates.
2526
Keep only the oldest advisory for each content ID.
@@ -37,31 +38,33 @@ def remove_duplicates_batch(advisory_ids, logger=None):
3738
key=lambda x: x.unique_content_id,
3839
)
3940

40-
progress = LoopProgress(total_iterations=advisories.count(), logger=logger)
41+
progress = LoopProgress(total_iterations=advisories.count(), logger=log)
4142

4243
for content_id, group_advisories in progress.iter(advisories_by_content_id):
4344
group_advisories = list(group_advisories)
4445

4546
if len(group_advisories) <= 1:
4647
continue
4748

48-
logger(
49+
log(
4950
f"Found {len(group_advisories)} duplicates for content ID {content_id}",
51+
level=logging.INFO,
5052
)
5153

52-
oldest = min(group_advisories, key=lambda x: x.date_imported)
54+
oldest = min(group_advisories, key=lambda x: x.date_collected)
5355

5456
advisory_ids_to_delete = [adv.id for adv in group_advisories if adv.id != oldest.id]
5557
if advisory_ids_to_delete:
5658
Advisory.objects.filter(id__in=advisory_ids_to_delete).delete()
57-
logger(
59+
log(
5860
f"Kept advisory {oldest.id} and removed "
5961
f"{len(advisory_ids_to_delete)} duplicates for content ID {content_id}",
62+
level=logging.INFO,
6063
)
6164

6265
except Exception as e:
63-
logger(
64-
f"Error processing batch of advisories: {e}",
66+
log(
67+
f"Error removing duplicates for batch of advisories: {traceback.format_exc()}",
6568
level=logging.ERROR,
6669
)
6770

0 commit comments

Comments
 (0)