@@ -524,6 +524,10 @@ def check_dataset(
524524 target_splits = target_splits & dataset_splits
525525
526526 checksum_report = {}
527+ # Track shape of each Global feature across all checked splits/samples to
528+ # detect inconsistencies (e.g. a Global stored as a scalar in one sample
529+ # and as a vector in another).
530+ global_shape_observations : dict [str , dict [tuple , list [str ]]] = {}
527531 for split in sorted (target_splits ):
528532 dataset = datasetdict [split ]
529533 converter = converterdict [split ]
@@ -595,6 +599,17 @@ def check_dataset(
595599 issue ,
596600 )
597601
602+ # Record the observed shape of this Global so we can later
603+ # detect dimension mismatches across all checked samples
604+ # (across splits). At this point ``_check_numeric_content``
605+ # already coerced ``value`` through ``np.asarray`` without
606+ # error, so the same call here is safe.
607+ if value is not None :
608+ shape = tuple (np .asarray (value ).shape )
609+ global_shape_observations .setdefault (global_name , {}).setdefault (
610+ shape , []
611+ ).append (f"{ split } [{ idx } ]" )
612+
598613 for time in sample .get_all_time_values ():
599614 local_bases = sample .get_base_names (time = time )
600615 for base in local_bases :
@@ -625,6 +640,25 @@ def check_dataset(
625640 issue ,
626641 )
627642
643+ # Report Globals whose dimension/shape is not consistent across all
644+ # checked samples (across splits).
645+ for global_name , shape_to_locations in global_shape_observations .items ():
646+ if len (shape_to_locations ) <= 1 :
647+ continue
648+ details = "; " .join (
649+ f"shape={ shape } at { locations [:5 ]} "
650+ + (f" (+{ len (locations ) - 5 } more)" if len (locations ) > 5 else "" )
651+ for shape , locations in sorted (
652+ shape_to_locations .items (), key = lambda kv : str (kv [0 ])
653+ )
654+ )
655+ report .add (
656+ "error" ,
657+ "GLOBAL_SHAPE_MISMATCH" ,
658+ f"global/{ global_name } " ,
659+ f"Global '{ global_name } ' has inconsistent shapes across samples: { details } " ,
660+ )
661+
628662 # Compare checksums from every checked sample to flag identical sample data.
629663 checksum_values = list (checksum_report .values ())
630664 if len (checksum_report ) != len (np .unique (checksum_values )):
0 commit comments