@@ -37,15 +37,36 @@ def scan_dataset(dataset_path: str) -> Dict:
3737 labels = find_files (root , [".txt" ])
3838 label_map = {f .stem : f for f in labels }
3939
40+ # Build a function to check if label file has valid YOLO lines
41+ def has_valid_label (file : Path ) -> bool :
42+ try :
43+ lines = file .read_text ().strip ().split ("\n " )
44+ except :
45+ return False
46+
47+ for line in lines :
48+ parts = line .strip ().split ()
49+ if len (parts ) == 5 :
50+ try :
51+ float (parts [1 ]); float (parts [2 ])
52+ float (parts [3 ]); float (parts [4 ])
53+ return True
54+ except ValueError :
55+ continue
56+ return False
57+
58+ # NEW: count only files with valid labels
59+ valid_label_files = [f for f in labels if has_valid_label (f )]
60+
4061 missing_labels = [img for img in images if img .stem not in label_map ]
4162 image_dirs = {img .parent for img in images }
42- label_dirs = {lbl .parent for lbl in labels }
63+ label_dirs = {lbl .parent for lbl in valid_label_files }
4364
4465 result = {
4566 "total_images" : len (images ),
46- "total_labels" : len (labels ),
67+ "total_labels" : len (valid_label_files ), # <-- FIXED
4768 "missing_pairs" : len (missing_labels ),
48- "missing_label_files" : [str (i ) for i in missing_labels [:20 ]], # preview
69+ "missing_label_files" : [str (i ) for i in missing_labels [:20 ]],
4970 "structure_type" : classify_structure (image_dirs , label_dirs ),
5071 "image_dirs" : list (map (str , image_dirs )),
5172 "label_dirs" : list (map (str , label_dirs )),
@@ -55,6 +76,7 @@ def scan_dataset(dataset_path: str) -> Dict:
5576 return result
5677
5778
79+
5880if __name__ == "__main__" :
5981 import pprint
6082 path = input ("Enter dataset directory: " )
0 commit comments