@@ -1058,8 +1058,11 @@ def _get_instances_from_solr(short_form: str, return_dataframe=True, limit: int
10581058 # Use the ordered tags to match expected format
10591059 tags = '|' .join (ordered_tags )
10601060
1061- # Extract thumbnail URL
1061+ # Extract thumbnail URL and convert to HTTPS
10621062 thumbnail_url = image_info .get ('image_thumbnail' , '' ) if image_info else ''
1063+ if thumbnail_url :
1064+ # Replace http with https and thumbnailT.png with thumbnail.png
1065+ thumbnail_url = thumbnail_url .replace ('http://' , 'https://' ).replace ('thumbnailT.png' , 'thumbnail.png' )
10631066
10641067 # Format thumbnail with proper markdown link (matching Neo4j format)
10651068 thumbnail = ''
@@ -1071,6 +1074,7 @@ def _get_instances_from_solr(short_form: str, return_dataframe=True, limit: int
10711074
10721075 if template_label and anatomy_label :
10731076 # Create thumbnail markdown link matching the original format
1077+ # DO NOT encode brackets in alt text - that's done later by encode_markdown_links
10741078 alt_text = f"{ anatomy_label } aligned to { template_label } "
10751079 link_target = f"{ template_short_form } ,{ anatomy_short_form } "
10761080 thumbnail = f"[]({ link_target } )"
@@ -1083,22 +1087,13 @@ def _get_instances_from_solr(short_form: str, return_dataframe=True, limit: int
10831087 if template_label and template_short_form :
10841088 template_formatted = f"[{ template_label } ]({ template_short_form } )"
10851089
1086- # Handle URL encoding for labels (match Neo4j format)
1090+ # Handle label formatting (match Neo4j format)
10871091 anatomy_label = anatomy .get ('label' , 'Unknown' )
10881092 anatomy_short_form = anatomy .get ('short_form' , '' )
10891093
1090- # URL encode special characters in label for markdown links (matching Neo4j behavior)
1091- # Only certain labels need encoding (like those with parentheses)
1092- import urllib .parse
1093- if '(' in anatomy_label or ')' in anatomy_label :
1094- # URL encode but keep spaces and common characters
1095- encoded_label = urllib .parse .quote (anatomy_label , safe = ' -_.' )
1096- else :
1097- encoded_label = anatomy_label
1098-
10991094 row = {
11001095 'id' : anatomy_short_form ,
1101- 'label' : f"[{ encoded_label } ]({ anatomy_short_form } )" ,
1096+ 'label' : f"[{ anatomy_label } ]({ anatomy_short_form } )" ,
11021097 'tags' : tags ,
11031098 'parent' : f"[{ term_info .get ('term' , {}).get ('core' , {}).get ('label' , 'Unknown' )} ]({ short_form } )" ,
11041099 'source' : '' , # Not readily available in SOLR anatomy_channel_image
@@ -1116,7 +1111,11 @@ def _get_instances_from_solr(short_form: str, return_dataframe=True, limit: int
11161111 total_count = len (anatomy_images )
11171112
11181113 if return_dataframe :
1119- return pd .DataFrame (rows )
1114+ df = pd .DataFrame (rows )
1115+ # Apply encoding to markdown links (matches Neo4j implementation)
1116+ columns_to_encode = ['label' , 'parent' , 'source' , 'source_id' , 'template' , 'dataset' , 'license' , 'thumbnail' ]
1117+ df = encode_markdown_links (df , columns_to_encode )
1118+ return df
11201119
11211120 return {
11221121 "headers" : _get_instances_headers (),
0 commit comments