Skip to content

Commit 02e6224

Browse files
vortexuserkluge7
andauthored
refactor: update model parameters and configurations across YOLO obje… (#59)
* refactor: update model parameters and configurations across YOLO object detection and segmentation pipelines * Update maintainer email in package.xml * Update maintainer information in setup.py * fix(config): update model parameters and dataset paths for OBB training * chore: fix pre-commit --------- Co-authored-by: Andreas Kluge Svendsrud <89779148+kluge7@users.noreply.github.com>
1 parent 877ddaa commit 02e6224

19 files changed

Lines changed: 511 additions & 33 deletions

File tree

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
/**:
22
ros__parameters:
3-
model_path: 'obb_best.pt'
4-
confidence_threshold: 0.1
5-
input_topic: '/realsense/D555_409122300281_Color'
3+
model_path: 'best.pt'
4+
confidence_threshold: 0.01
5+
input_topic: '/camera/camera/color/image_raw'
66
output_detections_topic: '/yolo_obb_object_detection/detections'
77
output_annotated_topic: '/yolo_obb_object_detection/annotated'
8-
device: 'cpu' # Inference device: 'cpu', 0, 1, 'cuda', 'cuda:0', or 'mps' (Mac GPU)
8+
device: '0' # Inference device: 'cpu', 0, 1, 'cuda', 'cuda:0', or 'mps' (Mac GPU)

ros/yolo_obb_object_detection/setup.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@
2424
maintainer_email='89779148+kluge7@users.noreply.github.com',
2525
description='YOLO OBB object detection on images, publishing detections and annotated outputs.',
2626
license='MIT',
27-
tests_require=['pytest'],
2827
entry_points={
2928
'console_scripts': [
3029
'yolo_obb_object_detection_node=yolo_obb_object_detection.yolo_obb_object_detection_node:main',
Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
/**:
22
ros__parameters:
3-
model_path: 'best.pt'
4-
confidence_threshold: 0.1
3+
model_path: 'best_simulator.pt'
4+
confidence_threshold: 0.5
55
input_topic: '/nautilus/front_camera/image_color'
66
output_detections_topic: '/yolo_object_detection/detections'
77
output_annotated_topic: '/yolo_object_detection/annotated'
8-
device: 'cpu' # or '0' for GPU
8+
device: '0' # or '0' for GPU

ros/yolo_object_detection/setup.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@
2424
maintainer_email='89779148+kluge7@users.noreply.github.com',
2525
description='YOLO object detection on images, publishing detections and annotated outputs.',
2626
license='MIT',
27-
tests_require=['pytest'],
2827
entry_points={
2928
'console_scripts': [
3029
'yolo_object_detection_node=yolo_object_detection.yolo_object_detection_node:main',

ros/yolo_segmentation/config/yolo_segmentation_params.yaml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
yolo_segmentation_node:
22
ros__parameters:
33
# Node parameters
4-
input_topic: "/nautilus/down_camera/image_color"
4+
input_topic: "/camera/camera/color/image_raw"
55
output_bbox_topic: "/pipeline/camera/bboxes"
66
output_mask_topic: "/pipeline/camera/segmentation_mask"
77
output_debug_topic: "/pipeline/camera/segmentation_debug"
@@ -10,11 +10,11 @@ yolo_segmentation_node:
1010
pub_debug: True
1111

1212
# Camera info parameters
13-
input_camera_info_topic: "/nautilus/down_camera/camera_info"
14-
output_camera_info_topic: "/pipeline/camera/camera_info" # Scaled to match segmentation mask resolution
13+
input_camera_info_topic: "/camera/camera/color/camera_info"
14+
output_camera_info_topic: "/pipeline/camera/camera_info" # Scaled to match segmentation mask resolution
1515

1616
# Implementation parameters
17-
model_path: "yolo26l_sim_and_real.pt"
17+
model_path: "mclab-seg-pipe-overfit-hopefully-large-model.pt"
1818
device: "cuda" # Options: "cpu", "cuda", "cuda:0", "cuda:1", "mps" ++
1919

2020
# Image preprocessing (what ultralytics does):

ros/yolo_segmentation/setup.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@
2121
maintainer_email='mjengesv@ntnu.no',
2222
description='ROS 2 package that provides a YOLO-based instance segmentation node (yolo_seg_node) for real-time segmentation.',
2323
license='MIT',
24-
tests_require=['pytest'],
2524
entry_points={
2625
'console_scripts': [
2726
'yolo_seg_node = yolo_segmentation.yolo_seg_node:main',
Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
/**:
22
ros__parameters:
3-
segmentation_image_sub_topic: "/front_camera_seg/image_color"
4-
color_image_sub_topic: "/front_camera/image_color"
5-
output_dir: /home/jorgen/ros2_ws/src/vortex-deep-learning-pipelines/stonefish_labeling/camera_segmentation/resources
3+
segmentation_image_sub_topic: "/nautilus/segmentation_cam/image_color"
4+
color_image_sub_topic: "/nautilus/front_camera/image_color"
5+
output_dir: /home/vortex/stonefish_labeling_output_new_new_new
66
sync_tolerance_ms: 100 # Timestamps of the two topics from stonefish are not perfectly synced.
77
# This sets the max allowed time offset between the two topics.

stonefish_labeling/dataset_conversion_scripts/convert_to_yolo.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,11 @@ def main():
124124
if not mask_files:
125125
mask_files = sorted(seg_dir.glob("*_ids.tiff"))
126126

127-
for mf in mask_files:
127+
total_masks = len(mask_files)
128+
print(f"[1/2] Scanning {total_masks} masks to discover class ids...")
129+
for i, mf in enumerate(mask_files, 1):
130+
if i % 50 == 0 or i == total_masks:
131+
print(f" scanned {i}/{total_masks}")
128132
ids_raw = cv2.imread(str(mf), cv2.IMREAD_UNCHANGED)
129133
if ids_raw is None:
130134
continue
@@ -156,7 +160,11 @@ def main():
156160
if not front_files:
157161
front_files = sorted(seg_dir.glob("*.png"))
158162

159-
for cpath in front_files:
163+
total_frames = len(front_files)
164+
print(f"[2/2] Converting {total_frames} frames to YOLO labels...")
165+
for i, cpath in enumerate(front_files, 1):
166+
if i % 50 == 0 or i == total_frames:
167+
print(f" converted {i}/{total_frames}")
160168
img = cv2.imread(str(cpath), cv2.IMREAD_COLOR)
161169
stem = cpath.stem
162170
mask_tiff = seg_dir / f"{stem}_mask.tiff"
Lines changed: 241 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,241 @@
1+
#!/usr/bin/env python3
2+
import argparse
3+
import json
4+
import logging
5+
import os
6+
import shutil
7+
from pathlib import Path
8+
9+
import cv2
10+
import numpy as np
11+
import pandas as pd
12+
13+
14+
def load_id_to_label(seg_dir: Path) -> dict:
15+
json_path = seg_dir / "id_label_map.json"
16+
csv_path = seg_dir / "id_label_map.csv"
17+
if json_path.exists():
18+
raw = json.loads(json_path.read_text(encoding="utf-8"))
19+
return {int(k): str(v) for k, v in raw.items()}
20+
if csv_path.exists():
21+
m = {}
22+
for r in pd.read_csv(csv_path).to_dict("records"):
23+
m[int(r["id"])] = str(r["label"])
24+
return m
25+
m = {}
26+
legend = seg_dir / "legend.csv"
27+
if legend.exists():
28+
ids = pd.read_csv(legend)["id"].tolist()
29+
for i in ids:
30+
if i == 0:
31+
m[i] = "background"
32+
elif i == 65534:
33+
m[i] = "unknown"
34+
else:
35+
m[i] = f"id_{i}"
36+
return m
37+
38+
39+
def load_legend_colors(seg_dir: Path) -> dict:
40+
legend_path = seg_dir / "legend.csv"
41+
colors = {}
42+
if not legend_path.exists():
43+
return colors
44+
df = pd.read_csv(legend_path)
45+
for _, row in df.iterrows():
46+
try:
47+
i = int(row["id"])
48+
r = int(row["r"])
49+
g = int(row["g"])
50+
b = int(row["b"])
51+
colors[i] = (r, g, b)
52+
except Exception as e:
53+
logging.debug("Skipping legend row due to error: %s", e)
54+
continue
55+
return colors
56+
57+
58+
def convert_mask_image_to_ids(ids_img: np.ndarray, legend_colors: dict) -> np.ndarray:
59+
if ids_img is None:
60+
return None
61+
if ids_img.ndim == 2:
62+
return ids_img.astype(np.int32)
63+
color_to_id = {}
64+
for _id, (r, g, b) in legend_colors.items():
65+
code = (b & 0xFF) | ((g & 0xFF) << 8) | ((r & 0xFF) << 16)
66+
color_to_id[code] = _id
67+
flat = ids_img.reshape(-1, ids_img.shape[2])[:, :3]
68+
codes = (
69+
flat[:, 0].astype(np.uint32)
70+
| (flat[:, 1].astype(np.uint32) << 8)
71+
| (flat[:, 2].astype(np.uint32) << 16)
72+
)
73+
mapped = np.full(codes.shape, 65534, dtype=np.int32)
74+
for code, _id in color_to_id.items():
75+
mapped[codes == code] = _id
76+
return mapped.reshape(ids_img.shape[0], ids_img.shape[1])
77+
78+
79+
def obb_line_from_mask(
80+
mask: np.ndarray, img_w: int, img_h: int, cls_idx: int, axis_aligned: bool = False
81+
):
82+
"""Return YOLO-OBB line: 'cls x1 y1 x2 y2 x3 y3 x4 y4' (normalized)."""
83+
ys, xs = np.where(mask)
84+
if ys.size < 3:
85+
return None
86+
if axis_aligned:
87+
x_min, x_max = float(xs.min()), float(xs.max())
88+
y_min, y_max = float(ys.min()), float(ys.max())
89+
box = np.array(
90+
[[x_min, y_min], [x_max, y_min], [x_max, y_max], [x_min, y_max]],
91+
dtype=np.float32,
92+
)
93+
else:
94+
pts = np.stack([xs, ys], axis=1).astype(np.float32)
95+
rect = cv2.minAreaRect(pts)
96+
box = cv2.boxPoints(rect)
97+
coords = []
98+
for x, y in box:
99+
coords.append(f"{x / img_w:.6f}")
100+
coords.append(f"{y / img_h:.6f}")
101+
return f"{cls_idx} " + " ".join(coords)
102+
103+
104+
def main():
105+
ap = argparse.ArgumentParser()
106+
ap.add_argument("--seg-dir", required=True)
107+
ap.add_argument("--out-dir", required=True)
108+
ap.add_argument("--min-pixels", type=int, default=200)
109+
ap.add_argument(
110+
"--keep-ids",
111+
type=str,
112+
default="",
113+
help="Comma-separated list of ids to keep (e.g. '7'). Empty = keep all.",
114+
)
115+
ap.add_argument(
116+
"--axis-aligned-ids",
117+
type=str,
118+
default="",
119+
help="Comma-separated ids that should get axis-aligned (upright) boxes "
120+
"instead of minAreaRect. Still emitted in OBB format.",
121+
)
122+
args = ap.parse_args()
123+
124+
keep_ids = (
125+
{int(x) for x in args.keep_ids.split(",") if x.strip()}
126+
if args.keep_ids
127+
else None
128+
)
129+
axis_aligned_ids = {int(x) for x in args.axis_aligned_ids.split(",") if x.strip()}
130+
131+
seg_dir = Path(os.path.expanduser(args.seg_dir))
132+
out_dir = Path(args.out_dir)
133+
(out_dir / "images").mkdir(parents=True, exist_ok=True)
134+
(out_dir / "labels").mkdir(parents=True, exist_ok=True)
135+
136+
id2label = load_id_to_label(seg_dir)
137+
legend_colors = load_legend_colors(seg_dir)
138+
139+
present_ids = set()
140+
mask_files = sorted(seg_dir.glob("*_mask.*"))
141+
if not mask_files:
142+
mask_files = sorted(seg_dir.glob("*_ids.tiff"))
143+
144+
total_masks = len(mask_files)
145+
print(f"[1/2] Scanning {total_masks} masks to discover class ids...")
146+
for i, mf in enumerate(mask_files, 1):
147+
if i % 50 == 0 or i == total_masks:
148+
print(f" scanned {i}/{total_masks}")
149+
ids_raw = cv2.imread(str(mf), cv2.IMREAD_UNCHANGED)
150+
if ids_raw is None:
151+
continue
152+
ids_map = convert_mask_image_to_ids(ids_raw, legend_colors)
153+
if ids_map is None:
154+
continue
155+
uniq, counts = np.unique(ids_map, return_counts=True)
156+
for obj_id, cnt in zip(uniq, counts):
157+
obj_id = int(obj_id)
158+
if obj_id in (0, 65534):
159+
continue
160+
if keep_ids is not None and obj_id not in keep_ids:
161+
continue
162+
if int(cnt) >= args.min_pixels:
163+
present_ids.add(obj_id)
164+
165+
class_ids = sorted(present_ids)
166+
classes = [id2label.get(obj_id, f"id_{obj_id}") for obj_id in class_ids]
167+
(out_dir / "classes.txt").write_text("\n".join(classes), encoding="utf-8")
168+
id2idx = {cid: i for i, cid in enumerate(class_ids)}
169+
170+
front_files = sorted(seg_dir.glob("frame_*.png"))
171+
if not front_files:
172+
front_files = sorted(seg_dir.glob("*.png"))
173+
174+
total_frames = len(front_files)
175+
print(f"[2/2] Converting {total_frames} frames to YOLO-OBB labels...")
176+
for i, cpath in enumerate(front_files, 1):
177+
if i % 50 == 0 or i == total_frames:
178+
print(f" converted {i}/{total_frames}")
179+
img = cv2.imread(str(cpath), cv2.IMREAD_COLOR)
180+
stem = cpath.stem
181+
mask_tiff = seg_dir / f"{stem}_mask.tiff"
182+
mask_png = seg_dir / f"{stem}_mask.png"
183+
if mask_tiff.exists():
184+
ids_raw = cv2.imread(str(mask_tiff), cv2.IMREAD_UNCHANGED)
185+
elif mask_png.exists():
186+
ids_raw = cv2.imread(str(mask_png), cv2.IMREAD_UNCHANGED)
187+
else:
188+
legacy = seg_dir / f"{stem}_ids.tiff"
189+
ids_raw = (
190+
cv2.imread(str(legacy), cv2.IMREAD_UNCHANGED)
191+
if legacy.exists()
192+
else None
193+
)
194+
195+
if ids_raw is None or img is None:
196+
print(f"WARNING: Skipping {cpath.name} (failed to read front or mask)")
197+
continue
198+
199+
ids = convert_mask_image_to_ids(ids_raw, legend_colors)
200+
if ids is None:
201+
continue
202+
203+
h, w = ids.shape[:2]
204+
uniq, counts = np.unique(ids, return_counts=True)
205+
206+
lines = []
207+
for obj_id, count in zip(uniq, counts):
208+
obj_id = int(obj_id)
209+
if obj_id in (0, 65534):
210+
continue
211+
if count < args.min_pixels:
212+
continue
213+
if obj_id not in id2idx:
214+
continue
215+
mask = ids == obj_id
216+
line = obb_line_from_mask(
217+
mask, w, h, id2idx[obj_id], axis_aligned=obj_id in axis_aligned_ids
218+
)
219+
if line is not None:
220+
lines.append(line)
221+
222+
label_path = out_dir / "labels" / (stem + ".txt")
223+
label_path.write_text("\n".join(lines), encoding="utf-8")
224+
225+
try:
226+
shutil.copy2(cpath, out_dir / "images" / cpath.name)
227+
except Exception as e:
228+
logging.debug("Failed to copy image %s: %s", cpath, e)
229+
230+
yaml = [
231+
f"path: {out_dir.resolve()}",
232+
"train: images",
233+
"val: images",
234+
f"names: {classes}",
235+
]
236+
(out_dir / "data.yaml").write_text("\n".join(yaml), encoding="utf-8")
237+
print(f"Done. Exported to: {out_dir}")
238+
239+
240+
if __name__ == "__main__":
241+
main()
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
from launch import LaunchDescription
2+
from launch.actions import DeclareLaunchArgument
3+
from launch.substitutions import LaunchConfiguration
4+
from launch_ros.actions import Node
5+
6+
7+
def generate_launch_description() -> LaunchDescription:
8+
return LaunchDescription(
9+
[
10+
DeclareLaunchArgument("period", default_value="5.0"),
11+
DeclareLaunchArgument("min_angle", default_value="-1.57"),
12+
DeclareLaunchArgument("max_angle", default_value="1.57"),
13+
Node(
14+
package="valve_randomizer",
15+
executable="valve_randomizer_node",
16+
name="valve_randomizer",
17+
output="screen",
18+
parameters=[
19+
{
20+
"valves": ["valve1", "valve2"],
21+
"period": LaunchConfiguration("period"),
22+
"min_angle": LaunchConfiguration("min_angle"),
23+
"max_angle": LaunchConfiguration("max_angle"),
24+
}
25+
],
26+
),
27+
]
28+
)

0 commit comments

Comments
 (0)