Skip to content

Commit 7a3ec74

Browse files
committed
Refine markdown link encoding and enhance label formatting to prefer symbols over labels for templates and anatomy
1 parent 92d798a commit 7a3ec74

1 file changed

Lines changed: 18 additions & 23 deletions

File tree

src/vfbquery/vfb_queries.py

Lines changed: 18 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -323,7 +323,8 @@ def encode_brackets(text):
323323

324324
def encode_markdown_links(df, columns):
325325
"""
326-
Encodes brackets in the labels and titles within markdown links and images, leaving the link syntax intact.
326+
Encodes brackets in the labels within markdown links, leaving the link syntax intact.
327+
Does NOT encode alt text in linked images ([![...](...)(...)] format).
327328
:param df: DataFrame containing the query results.
328329
:param columns: List of column names to apply encoding to.
329330
"""
@@ -332,28 +333,10 @@ def encode_label(label):
332333
return label
333334

334335
try:
335-
# Process linked images (format: [![alt text](image_url "title")](link))
336+
# Skip linked images (format: [![alt text](image_url "title")](link))
337+
# These should NOT be encoded
336338
if label.startswith("[!["):
337-
# Split into image part and link part
338-
parts = label.split(")](")
339-
if len(parts) < 2:
340-
return label
341-
342-
image_part = parts[0]
343-
link_part = parts[1]
344-
345-
# Process the image part
346-
image_parts = image_part.split("](")
347-
if len(image_parts) < 2:
348-
return label
349-
350-
alt_text = image_parts[0][3:] # Remove the "[![" prefix
351-
# Encode brackets in alt text
352-
alt_text_encoded = encode_brackets(alt_text)
353-
354-
# Reconstruct the linked image with encoded alt text
355-
encoded_label = f"[![{alt_text_encoded}]({image_parts[1]})]({link_part}"
356-
return encoded_label
339+
return label
357340

358341
# Process regular markdown links
359342
elif label.startswith("[") and "](" in label:
@@ -1067,9 +1050,16 @@ def _get_instances_from_solr(short_form: str, return_dataframe=True, limit: int
10671050
# Format thumbnail with proper markdown link (matching Neo4j format)
10681051
thumbnail = ''
10691052
if thumbnail_url and template_anatomy:
1053+
# Prefer symbol over label for template (matching Neo4j behavior)
10701054
template_label = template_anatomy.get('label', '')
1055+
if template_anatomy.get('symbol') and len(template_anatomy.get('symbol', '')) > 0:
1056+
template_label = template_anatomy.get('symbol')
10711057
template_short_form = template_anatomy.get('short_form', '')
1058+
1059+
# Prefer symbol over label for anatomy (matching Neo4j behavior)
10721060
anatomy_label = anatomy.get('label', '')
1061+
if anatomy.get('symbol') and len(anatomy.get('symbol', '')) > 0:
1062+
anatomy_label = anatomy.get('symbol')
10731063
anatomy_short_form = anatomy.get('short_form', '')
10741064

10751065
if template_label and anatomy_label:
@@ -1082,13 +1072,18 @@ def _get_instances_from_solr(short_form: str, return_dataframe=True, limit: int
10821072
# Format template information
10831073
template_formatted = ''
10841074
if template_anatomy:
1075+
# Prefer symbol over label (matching Neo4j behavior)
10851076
template_label = template_anatomy.get('label', '')
1077+
if template_anatomy.get('symbol') and len(template_anatomy.get('symbol', '')) > 0:
1078+
template_label = template_anatomy.get('symbol')
10861079
template_short_form = template_anatomy.get('short_form', '')
10871080
if template_label and template_short_form:
10881081
template_formatted = f"[{template_label}]({template_short_form})"
10891082

1090-
# Handle label formatting (match Neo4j format)
1083+
# Handle label formatting (match Neo4j format - prefer symbol over label)
10911084
anatomy_label = anatomy.get('label', 'Unknown')
1085+
if anatomy.get('symbol') and len(anatomy.get('symbol', '')) > 0:
1086+
anatomy_label = anatomy.get('symbol')
10921087
anatomy_short_form = anatomy.get('short_form', '')
10931088

10941089
row = {

0 commit comments

Comments
 (0)