Skip to content

Commit e106501

Browse files
committed
Handle missing conflation lookup results
1 parent 9f6e9c0 commit e106501

1 file changed

Lines changed: 56 additions & 10 deletions

File tree

src/nodenorm/handlers/normalized_nodes.py

Lines changed: 56 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -365,11 +365,40 @@ async def _lookup_curie_metadata(
365365

366366
replacement_identifiers = []
367367
replacement_types = []
368+
skipped_conflation_curies = []
368369
conflation_label_discovered = False
369370
for conflation_curie in conflation_identifiers:
370-
conflation_result = conflation_result_lookup.get(conflation_curie, {})
371+
if conflation_curie in malformed_conflation_curies:
372+
skipped_conflation_curies.append(conflation_curie)
373+
logger.warning(
374+
"Unable to resolve conflation CURIE %s while normalizing %s; skipping it.",
375+
conflation_curie,
376+
input_curie,
377+
)
378+
continue
379+
380+
conflation_result = conflation_result_lookup.get(conflation_curie)
381+
if not conflation_result:
382+
skipped_conflation_curies.append(conflation_curie)
383+
logger.warning(
384+
"No lookup result found for conflation CURIE %s while normalizing %s; skipping it.",
385+
conflation_curie,
386+
input_curie,
387+
)
388+
continue
389+
371390
conflation_biolink_type = conflation_result.get("_source", {}).get("type", [])
372391
conflation_identifier_lookup = conflation_result.get("_source", {}).get("identifiers", [])
392+
if not conflation_identifier_lookup:
393+
skipped_conflation_curies.append(conflation_curie)
394+
logger.warning(
395+
"Conflation CURIE %s resolved to document %s with no identifiers while normalizing %s; "
396+
"skipping it.",
397+
conflation_curie,
398+
conflation_result.get("_id"),
399+
input_curie,
400+
)
401+
continue
373402

374403
for conflation_entry in conflation_identifier_lookup:
375404
conflation_entry.update({"t": [conflation_biolink_type]})
@@ -388,15 +417,32 @@ async def _lookup_curie_metadata(
388417

389418
replacement_types = unique_list(replacement_types)
390419

391-
node = NormalizedNode(
392-
curie=input_curie,
393-
canonical_identifier=canonical_identifier,
394-
preferred_label=preferred_label,
395-
information_content=information_content,
396-
identifiers=replacement_identifiers,
397-
types=replacement_types,
398-
taxa=taxa,
399-
)
420+
if not replacement_identifiers:
421+
logger.error(
422+
"Unable to resolve any conflation CURIEs for %s; falling back to base normalized node. "
423+
"Skipped conflation CURIEs: %s",
424+
input_curie,
425+
skipped_conflation_curies or conflation_identifiers,
426+
)
427+
node = NormalizedNode(
428+
curie=input_curie,
429+
canonical_identifier=canonical_identifier,
430+
preferred_label=preferred_label,
431+
information_content=information_content,
432+
identifiers=identifiers,
433+
types=node_types,
434+
taxa=taxa,
435+
)
436+
else:
437+
node = NormalizedNode(
438+
curie=input_curie,
439+
canonical_identifier=canonical_identifier,
440+
preferred_label=preferred_label,
441+
information_content=information_content,
442+
identifiers=replacement_identifiers,
443+
types=replacement_types,
444+
taxa=taxa,
445+
)
400446
nodes.append(node)
401447
else:
402448
node = NormalizedNode(

0 commit comments

Comments
 (0)