2929
3030
3131def _slugify_project (value ):
32- """Return a filesystem-friendly project identifier."""
32+ """
33+ Return a filesystem-friendly project identifier.
34+
35+ Args:
36+ value: Project name or label to normalize.
37+
38+ Returns:
39+ str: Normalized project identifier.
40+ """
3341 project = str (value ).strip ().lower ()
3442 project = re .sub (r"\s+" , "_" , project )
3543 project = re .sub (r"[^a-z0-9_-]+" , "_" , project )
3644 return project .strip ("_" )
3745
3846
3947def _display_project (value ):
40- """Return a readable project label for generated metadata."""
48+ """
49+ Return a readable project label for generated metadata.
50+
51+ Args:
52+ value: Project name or identifier to display.
53+
54+ Returns:
55+ str: Title-cased project label.
56+ """
4157 return str (value ).strip ().replace ("_" , " " ).replace ("-" , " " ).title ()
4258
4359
@@ -202,7 +218,8 @@ def __init__(
202218 self ._resolve_version_history_template ()
203219
204220 def _resolve_version_history_template (self ):
205- """Resolve the previous Excel template used to read VERSION history.
221+ """
222+ Resolve the previous Excel template used to read VERSION history.
206223
207224 Initial versions skip previous template lookup; regular versions use either
208225 the explicit template path or the installed project template in assets.
@@ -286,7 +303,7 @@ def _load_laboratory_addresses(self):
286303 uniques [f ].add (name )
287304
288305 dropdowns = {
289- k : sorted (self ._unique_enum_values (v )) for k , v in dropdowns .items ()
306+ k : sorted (BuildSchema ._unique_enum_values (v )) for k , v in dropdowns .items ()
290307 }
291308 uniques = {k : sorted (v ) for k , v in uniques .items ()}
292309 return dropdowns , uniques
@@ -482,35 +499,77 @@ def _validate_examples_in_enum(
482499 enum_value : any ,
483500 expected_type : str | None ,
484501 ) -> list [str ]:
485- """Return validation errors for examples that are not present in enum."""
486- if self ._is_empty_validation_value (enum_value ):
502+ """
503+ Return validation errors for examples that are not present in enum.
504+
505+ Args:
506+ property_id (str): Property name used for warning messages.
507+ example_value: Raw examples value from the database definition.
508+ enum_value: Raw enum definition or reference.
509+ expected_type (str | None): Declared JSON Schema type for the property.
510+
511+ Returns:
512+ list[str]: Validation error messages.
513+ """
514+ if BuildSchema ._is_empty_validation_value (enum_value ):
487515 return []
488- if self ._is_empty_validation_value (example_value ):
516+ if BuildSchema ._is_empty_validation_value (example_value ):
489517 return []
490518
491519 enum_values = self ._parse_enum_values (enum_value )
492520 if not isinstance (enum_values , list ) or not enum_values :
493521 return []
494522
495- examples = self ._parse_examples_for_validation (example_value )
523+ examples = BuildSchema ._parse_examples_for_validation (example_value )
496524 examples = self ._cast_examples_to_declared_type (
497525 property_id , expected_type , examples
498526 )
499527
500528 enum_lookup = {
501- self ._normalize_enum_example_value (value ) for value in enum_values
529+ BuildSchema ._normalize_enum_example_value (value ) for value in enum_values
502530 }
503531 return [
504532 f"Example '{ example } ' is not defined in enum."
505533 for example in examples
506- if self ._normalize_enum_example_value (example ) not in enum_lookup
534+ if BuildSchema ._normalize_enum_example_value (example ) not in enum_lookup
507535 ]
508536
509537 @staticmethod
510538 def _normalize_enum_example_value (value : any ) -> any :
511539 if not isinstance (value , str ):
512540 return value
513- return re .sub (r"\s*\[[^\]]+\]$" , "" , value ).strip ()
541+ return BuildSchema ._clean_enum_ontology_annotation (value )
542+
543+ @staticmethod
544+ def _clean_enum_ontology_annotation (value : any ) -> any :
545+ """
546+ Remove ontology annotations displayed between brackets from enum labels.
547+
548+ Args:
549+ value: Enum value to clean.
550+
551+ Returns:
552+ Cleaned enum value when it is a string; otherwise the original value.
553+ """
554+ if not isinstance (value , str ):
555+ return value
556+ return re .sub (r"\s*\[[^\]]+\]" , "" , value ).strip ()
557+
558+ def _clean_template_enum_values (self , values : any ) -> any :
559+ """
560+ Return enum values as displayed in the Excel template dropdowns.
561+
562+ Args:
563+ values: Enum values to clean.
564+
565+ Returns:
566+ Cleaned enum list, or the original value when it is not a list.
567+ """
568+ if not isinstance (values , list ):
569+ return values
570+ return BuildSchema ._unique_enum_values (
571+ [BuildSchema ._clean_enum_ontology_annotation (value ) for value in values ]
572+ )
514573
515574 @staticmethod
516575 def _is_empty_validation_value (value : any ) -> bool :
@@ -520,7 +579,15 @@ def _is_empty_validation_value(value: any) -> bool:
520579
521580 @staticmethod
522581 def _parse_examples_for_validation (example_value : any ) -> list [any ]:
523- """Parse the examples cell using the same separator used for schema examples."""
582+ """
583+ Parse the examples cell using the same separator used for schema examples.
584+
585+ Args:
586+ example_value: Raw examples value from the database definition.
587+
588+ Returns:
589+ list: Parsed examples.
590+ """
524591 if isinstance (example_value , str ):
525592 return [
526593 value .strip () for value in example_value .split ("; " ) if value .strip ()
@@ -948,7 +1015,15 @@ def verify_schema(self, schema):
9481015
9491016 @staticmethod
9501017 def _find_duplicate_values (values : list ) -> list :
951- """Return duplicated values preserving first duplicate encounter order."""
1018+ """
1019+ Return duplicated values preserving first duplicate encounter order.
1020+
1021+ Args:
1022+ values (list): Values to inspect for duplicates.
1023+
1024+ Returns:
1025+ list: Duplicate values.
1026+ """
9521027 seen = set ()
9531028 duplicates = []
9541029 duplicate_seen = set ()
@@ -968,14 +1043,22 @@ def _find_duplicate_values(values: list) -> list:
9681043 return duplicates
9691044
9701045 def validate_schema_enum_duplicates (self , schema : dict ):
971- """Validate that every enum list in a generated schema has unique values."""
1046+ """
1047+ Validate that every enum list in a generated schema has unique values.
1048+
1049+ Args:
1050+ schema (dict): JSON Schema to inspect.
1051+
1052+ Returns:
1053+ None
1054+ """
9721055 duplicate_enums = {}
9731056
9741057 def walk_schema (node , path = "$" ):
9751058 if isinstance (node , dict ):
9761059 enum_values = node .get ("enum" )
9771060 if isinstance (enum_values , list ):
978- duplicates = self ._find_duplicate_values (enum_values )
1061+ duplicates = BuildSchema ._find_duplicate_values (enum_values )
9791062 if duplicates :
9801063 duplicate_enums [path ] = duplicates
9811064 for key , value in node .items ():
@@ -1122,9 +1205,10 @@ def _sort_enum_values(self, enum_values: list[str]) -> list[str]:
11221205 "missing" : 5 ,
11231206 "restricted access" : 6 ,
11241207 "other" : 7 ,
1208+ "none" : 8 ,
11251209 }
11261210
1127- unique_values = self ._unique_enum_values (enum_values )
1211+ unique_values = BuildSchema ._unique_enum_values (enum_values )
11281212
11291213 def sort_key (value : str ):
11301214 normalized_value = value .strip ().casefold ()
@@ -1137,7 +1221,15 @@ def sort_key(value: str):
11371221
11381222 @staticmethod
11391223 def _unique_enum_values (enum_values : list ) -> list :
1140- """Return enum values without duplicates, preserving first occurrence order."""
1224+ """
1225+ Return enum values without duplicates, preserving first occurrence order.
1226+
1227+ Args:
1228+ enum_values (list): Enum values to deduplicate.
1229+
1230+ Returns:
1231+ list: Unique enum values.
1232+ """
11411233 unique_values = []
11421234 seen = set ()
11431235 for value in enum_values :
@@ -1354,6 +1446,29 @@ def _template_only_properties_to_df(self, database_definition: dict | None):
13541446
13551447 return pd .DataFrame (template_rows )
13561448
1449+ @staticmethod
1450+ def _format_template_required_value (value ):
1451+ """
1452+ Return the visible required label used in the metadata template.
1453+
1454+ Args:
1455+ value: Raw required value from the database definition.
1456+
1457+ Returns:
1458+ str: Normalized required label.
1459+ """
1460+ required_value = str (value or "" ).strip ()
1461+ if required_value .upper () == "Y" :
1462+ return "YES"
1463+ if required_value .upper () in ["N" , "NO" ]:
1464+ return "NO"
1465+ if required_value .lower ().startswith ("y if " ):
1466+ condition = required_value [5 :].strip ()
1467+ if condition .lower () == "sequenced" :
1468+ condition = "sequenced"
1469+ return f"YES if { condition } "
1470+ return required_value
1471+
13571472 def create_metadatalab_excel (self , json_schema , database_definition = None ):
13581473 """
13591474 Generates an Excel template file for Metadata LAB with four sheets:
@@ -1462,11 +1577,23 @@ def create_metadatalab_excel(self, json_schema, database_definition=None):
14621577 df ["required" ] = df ["property_id" ].apply (
14631578 lambda x : "Y" if x in required_properties else "N"
14641579 )
1465-
1466- def clean_ontologies (enums ):
1467- return self ._unique_enum_values (
1468- [re .sub (r"\s*\[.*?\]" , "" , item ).strip () for item in enums ]
1580+ if database_definition :
1581+ required_values = {
1582+ property_id : BuildSchema ._format_template_required_value (
1583+ features .get ("required (Y/N)" )
1584+ )
1585+ for property_id , features in database_definition .items ()
1586+ }
1587+ df ["required" ] = df .apply (
1588+ lambda row : required_values .get (
1589+ row ["property_id" ], row ["required" ]
1590+ )
1591+ or row ["required" ],
1592+ axis = 1 ,
14691593 )
1594+ df ["required" ] = df ["required" ].apply (
1595+ BuildSchema ._format_template_required_value
1596+ )
14701597
14711598 def resolve_enum_ref (ref : str , enum_defs : dict ) -> list [str ]:
14721599 property_key = ref .split ("enums/" )[- 1 ]
@@ -1484,9 +1611,7 @@ def resolve_enum_ref(ref: str, enum_defs: dict) -> list[str]:
14841611 f"[red]Error finding enum for property '{ '.' .join (property_id )} '; not found in $defs"
14851612 )
14861613 return []
1487- return (
1488- clean_ontologies (values ) if isinstance (values , list ) else values
1489- )
1614+ return self ._clean_template_enum_values (values )
14901615
14911616 resolved_enums = df ["$ref" ].apply (
14921617 lambda row : (
@@ -1501,6 +1626,8 @@ def resolve_enum_ref(ref: str, enum_defs: dict) -> list[str]:
15011626 )
15021627 else :
15031628 df ["enum" ] = resolved_enums
1629+
1630+ df ["enum" ] = df ["enum" ].apply (self ._clean_template_enum_values )
15041631 common_dropdown = self ._lab_dropdowns ["collecting_institution" ]
15051632
15061633 lab_fields = [
@@ -1562,7 +1689,11 @@ def resolve_enum_ref(ref: str, enum_defs: dict) -> list[str]:
15621689 metadatalab_header = ["CAMPO" , "DESCRIPCIÓN" , "EJEMPLOS" , "REQUERIDO" ]
15631690 df_metadata = pd .DataFrame (columns = metadatalab_header )
15641691 df_metadata ["REQUERIDO" ] = df_filtered ["required" ].apply (
1565- lambda x : "YES" if str (x ).upper () in ["Y" , "YES" ] else ""
1692+ lambda x : (
1693+ "YES"
1694+ if str (x ).upper () in ["Y" , "YES" ]
1695+ else "" if str (x ).upper () in ["N" , "NO" ] else x
1696+ )
15661697 )
15671698 df_metadata ["EJEMPLOS" ] = df_filtered ["examples" ].apply (
15681699 lambda x : x [0 ] if isinstance (x , list ) else x
0 commit comments