Skip to content

Commit dd6ddb2

Browse files
author
Gerit Wagner
committed
record-similarity: annotation + catch bib_dedupe_exception.MissingRequiredFieldsError
1 parent 4cef223 commit dd6ddb2

1 file changed

Lines changed: 11 additions & 2 deletions

File tree

colrev/record/record_similarity.py

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import re
66
import typing
77

8+
import bib_dedupe.exception as bib_dedupe_exception
89
import pandas as pd
910
from bib_dedupe.bib_dedupe import block
1011
from bib_dedupe.bib_dedupe import match
@@ -26,7 +27,7 @@ def _norm(value: str) -> str:
2627
return "" if v.upper() in {"UNKNOWN", "NA", "N/A"} else v
2728

2829

29-
def _record_str(record) -> str:
30+
def _record_str(record: colrev.record.record.Record) -> str:
3031
d = record.data
3132
return (
3233
f"{_norm(d.get(Fields.AUTHOR, ''))} ({_norm(d.get(Fields.YEAR, ''))}) "
@@ -261,11 +262,19 @@ def matches(
261262
"""Determine whether two records match (correspond to the same entity)."""
262263
record_a_dict = record_a.copy().get_data()
263264
record_b_dict = record_b.copy().get_data()
265+
266+
# add REQUIRED_FIELDS = [ID, ENTRYTYPE, TITLE, AUTHOR, YEAR]
267+
264268
record_a_dict[Fields.ID] = "a"
265269
record_b_dict[Fields.ID] = "b"
266270

267271
records_df = pd.DataFrame([record_a_dict, record_b_dict])
268-
records_df = prep(records_df, verbosity_level=0, cpu=1)
272+
273+
try:
274+
records_df = prep(records_df, verbosity_level=0, cpu=1)
275+
except bib_dedupe_exception.MissingRequiredFieldsError:
276+
return False
277+
269278
blocked_df = block(records_df, verbosity_level=0, cpu=1)
270279
matched_df = match(blocked_df, verbosity_level=0, cpu=1)
271280
duplicate_label = matched_df["duplicate_label"]

0 commit comments

Comments
 (0)