Skip to content

Commit 25cf3e7

Browse files
handle checks.
1 parent 5cf4e85 commit 25cf3e7

4 files changed

Lines changed: 34 additions & 7 deletions

File tree

.gitignore

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,4 +10,5 @@ logs
1010
logs_extreme
1111
htmlcov
1212
coverage.xml
13-
Makefile
13+
Makefile
14+
.coverage

src/yolo_augmentor/aug/augment_dataset.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,7 @@ def _load_config(self, config_path: str) -> Dict:
106106
def _setup_logging(self):
107107
"""Setup comprehensive logging for debugging."""
108108
log_dir = Path(self.config['dataset'].get('logs_dir', 'logs'))
109-
log_dir.mkdir(exist_ok=True)
109+
log_dir.mkdir(parents=True, exist_ok=True)
110110
log_file = log_dir / f"augmentation_{datetime.now():%Y%m%d_%H%M%S}.log"
111111

112112
logging.basicConfig(

src/yolo_augmentor/data/repair_labels.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,8 +36,12 @@ def _clean_line(line: str):
3636
x, y = max(0, min(1, x)), max(0, min(1, y))
3737
w, h = max(0, min(1, w)), max(0, min(1, h))
3838

39-
# invalid bounding boxes
40-
if w <= 0 or h <= 0:
39+
# Coordinates must be strictly inside (0,1)
40+
if not (0 < x < 1 and 0 < y < 1):
41+
return None
42+
43+
# width/height must be positive AND ensure box doesn't exceed bounds
44+
if not (0 < w < 1 and 0 < h < 1):
4145
return None
4246

4347
return cls, x, y, w, h

src/yolo_augmentor/data/scan_dataset.py

Lines changed: 25 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -37,15 +37,36 @@ def scan_dataset(dataset_path: str) -> Dict:
3737
labels = find_files(root, [".txt"])
3838
label_map = {f.stem: f for f in labels}
3939

40+
# Build a function to check if label file has valid YOLO lines
41+
def has_valid_label(file: Path) -> bool:
42+
try:
43+
lines = file.read_text().strip().split("\n")
44+
except:
45+
return False
46+
47+
for line in lines:
48+
parts = line.strip().split()
49+
if len(parts) == 5:
50+
try:
51+
float(parts[1]); float(parts[2])
52+
float(parts[3]); float(parts[4])
53+
return True
54+
except ValueError:
55+
continue
56+
return False
57+
58+
# NEW: count only files with valid labels
59+
valid_label_files = [f for f in labels if has_valid_label(f)]
60+
4061
missing_labels = [img for img in images if img.stem not in label_map]
4162
image_dirs = {img.parent for img in images}
42-
label_dirs = {lbl.parent for lbl in labels}
63+
label_dirs = {lbl.parent for lbl in valid_label_files}
4364

4465
result = {
4566
"total_images": len(images),
46-
"total_labels": len(labels),
67+
"total_labels": len(valid_label_files), # <-- FIXED
4768
"missing_pairs": len(missing_labels),
48-
"missing_label_files": [str(i) for i in missing_labels[:20]], # preview
69+
"missing_label_files": [str(i) for i in missing_labels[:20]],
4970
"structure_type": classify_structure(image_dirs, label_dirs),
5071
"image_dirs": list(map(str, image_dirs)),
5172
"label_dirs": list(map(str, label_dirs)),
@@ -55,6 +76,7 @@ def scan_dataset(dataset_path: str) -> Dict:
5576
return result
5677

5778

79+
5880
if __name__ == "__main__":
5981
import pprint
6082
path = input("Enter dataset directory: ")

0 commit comments

Comments
 (0)