From 7841e812ca371f3640615df5e6511fbcc54d967d Mon Sep 17 00:00:00 2001 From: DinethraDivanjana2001 <143680271+DinethraDivanjana2001@users.noreply.github.com> Date: Tue, 26 May 2026 11:52:03 +0530 Subject: [PATCH] feat: update Jetson Orin Nano workspace - complete implementation with evaluation framework, ROS2 nodes, BT integration, docs --- .../Evaluation_V_I_ws/COLLECTION_GUIDE.md | 149 ++++ .../Evaluation_V_I_ws/collect_dataset.py | 577 ++++++++++++++ .../Evaluation_V_I_ws/collect_yolo_dataset.py | 436 +++++++++++ .../Evaluation_V_I_ws/data_collect.md | 454 +++++++++++ .../Evaluation_V_I_ws/evaluation_plan.md | 721 ++++++++++++++++++ .../Evaluation_V_I_ws/laptop_receiver.py | 133 ++++ .../scripts/evaluate_gauge.py | 148 ++++ .../scripts/evaluate_ibvs.py | 130 ++++ .../scripts/evaluate_image_quality.py | 161 ++++ .../Evaluation_V_I_ws/scripts/evaluate_vlm.py | 184 +++++ .../scripts/evaluate_vlm_llm_judge.py | 138 ++++ .../scripts/requirements_eval.txt | 13 + .../Evaluation_V_I_ws/setup_eval_folders.sh | 47 ++ .../Evaluation_V_I_ws/sync_from_jetson.sh | 62 ++ .../README.md | 8 +- .../docs/DEMO_COMMANDS.md | 370 +++++++++ .../docs/JETSON_WORKSPACE_OVERVIEW.md | 137 ++++ .../docs/WEEKLY_REPORT_WEEK3.md | 239 ++++++ .../docs/WEEKLY_REPORT_WEEK4.md | 289 +++++++ .../docs/WEEKLY_REPORT_WEEK5.md | 262 +++++++ .../inspection_ws/PLAN.md | 272 +++++++ .../inspection_ws/README.md | 206 ++--- .../CMakeLists.txt | 2 + .../action/InspectObjects.action | 3 + .../srv/Inspect.srv | 33 + .../srv/UploadImages.srv | 10 + .../docs/BT_INTEGRATION_GUIDE.md | 434 +++++++++++ .../visual_inspection_ros/setup.py | 11 +- .../ibvs_action_server.py | 515 +++++++++++-- .../visual_inspection_ros/image_uploader.py | 151 ++++ .../inspection_service.py | 478 ++++++++++++ .../integrated_pipeline.py | 0 .../docs/OPTIMIZATION_PLAN.md | 437 +++++++++++ .../jetson_workspace/ibvs_pipeline.py | 2 +- .../jetson_workspace/run.sh | 0 .../tests/04_test_arduino_serial.py | 100 +++ .../tests/fix_ch340_permanent.sh | 121 +++ .../tests/wasd_servo_control.py | 304 ++++++++ .../single_camera_pipeline.py | 0 .../test_full_pipeline.py | 0 .../test_scripts/00_check_devices.sh | 0 .../test_scripts/01_test_cameras.py | 0 .../test_scripts/02_yolo_detection_auto.py | 0 .../test_scripts/02_yolo_detection_cpu.py | 0 .../test_scripts/03_export_to_tensorrt.py | 0 .../04_yolo_detection_tensorrt.py | 0 .../test_scripts/05_hemisphere_mapping.py | 0 .../05_yolo_detection_onnx_gpu.py | 0 .../test_scripts/06_arduino_test.py | 0 .../test_scripts/07_detection_confirmation.py | 0 .../test_scripts/test_bt_full_flow.py | 198 +++++ .../test_scripts/test_full_pipeline.py | 47 +- .../test_scripts/test_inspection_service.py | 76 ++ .../tools/detect_cameras.py | 0 .../tools/ibvs_pipeline.py | 2 +- .../tools/quick_camera_test.py | 0 .../visual_pipeline.py | 0 57 files changed, 7894 insertions(+), 166 deletions(-) create mode 100644 Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/Evaluation_V_I_ws/COLLECTION_GUIDE.md create mode 100644 Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/Evaluation_V_I_ws/collect_dataset.py create mode 100644 Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/Evaluation_V_I_ws/collect_yolo_dataset.py create mode 100644 Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/Evaluation_V_I_ws/data_collect.md create mode 100644 Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/Evaluation_V_I_ws/evaluation_plan.md create mode 100644 Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/Evaluation_V_I_ws/laptop_receiver.py create mode 100644 Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/Evaluation_V_I_ws/scripts/evaluate_gauge.py create mode 100644 Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/Evaluation_V_I_ws/scripts/evaluate_ibvs.py create mode 100644 Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/Evaluation_V_I_ws/scripts/evaluate_image_quality.py create mode 100644 Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/Evaluation_V_I_ws/scripts/evaluate_vlm.py create mode 100644 Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/Evaluation_V_I_ws/scripts/evaluate_vlm_llm_judge.py create mode 100644 Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/Evaluation_V_I_ws/scripts/requirements_eval.txt create mode 100644 Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/Evaluation_V_I_ws/setup_eval_folders.sh create mode 100755 Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/Evaluation_V_I_ws/sync_from_jetson.sh create mode 100644 Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/docs/DEMO_COMMANDS.md create mode 100644 Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/docs/JETSON_WORKSPACE_OVERVIEW.md create mode 100644 Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/docs/WEEKLY_REPORT_WEEK3.md create mode 100644 Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/docs/WEEKLY_REPORT_WEEK4.md create mode 100644 Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/docs/WEEKLY_REPORT_WEEK5.md create mode 100644 Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/inspection_ws/PLAN.md create mode 100644 Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/inspection_ws/visual_inspection_interfaces/srv/Inspect.srv create mode 100644 Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/inspection_ws/visual_inspection_interfaces/srv/UploadImages.srv create mode 100644 Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/inspection_ws/visual_inspection_ros/docs/BT_INTEGRATION_GUIDE.md create mode 100644 Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/inspection_ws/visual_inspection_ros/visual_inspection_ros/image_uploader.py create mode 100644 Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/inspection_ws/visual_inspection_ros/visual_inspection_ros/inspection_service.py mode change 100644 => 100755 Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/integrated_pipeline.py create mode 100644 Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/jetson_workspace/docs/OPTIMIZATION_PLAN.md mode change 100644 => 100755 Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/jetson_workspace/run.sh create mode 100644 Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/jetson_workspace/tests/04_test_arduino_serial.py create mode 100644 Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/jetson_workspace/tests/fix_ch340_permanent.sh create mode 100644 Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/jetson_workspace/tests/wasd_servo_control.py mode change 100644 => 100755 Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/single_camera_pipeline.py mode change 100644 => 100755 Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/test_full_pipeline.py mode change 100644 => 100755 Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/test_scripts/00_check_devices.sh mode change 100644 => 100755 Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/test_scripts/01_test_cameras.py mode change 100644 => 100755 Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/test_scripts/02_yolo_detection_auto.py mode change 100644 => 100755 Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/test_scripts/02_yolo_detection_cpu.py mode change 100644 => 100755 Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/test_scripts/03_export_to_tensorrt.py mode change 100644 => 100755 Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/test_scripts/04_yolo_detection_tensorrt.py mode change 100644 => 100755 Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/test_scripts/05_hemisphere_mapping.py mode change 100644 => 100755 Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/test_scripts/05_yolo_detection_onnx_gpu.py mode change 100644 => 100755 Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/test_scripts/06_arduino_test.py mode change 100644 => 100755 Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/test_scripts/07_detection_confirmation.py create mode 100644 Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/test_scripts/test_bt_full_flow.py create mode 100644 Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/test_scripts/test_inspection_service.py mode change 100644 => 100755 Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/tools/detect_cameras.py mode change 100644 => 100755 Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/tools/quick_camera_test.py mode change 100644 => 100755 Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/visual_pipeline.py diff --git a/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/Evaluation_V_I_ws/COLLECTION_GUIDE.md b/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/Evaluation_V_I_ws/COLLECTION_GUIDE.md new file mode 100644 index 00000000..66d2afb3 --- /dev/null +++ b/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/Evaluation_V_I_ws/COLLECTION_GUIDE.md @@ -0,0 +1,149 @@ +# Data Collection Guide + +--- + +## Terminal Commands + +### Start Pipeline (every session) + +```bash +# T1 +source /opt/ros/humble/setup.bash && source ~/Documents/Visual_Inspection_ws/inspection_ws/install/setup.bash +ros2 run visual_inspection_ros camera_node + +# T2 +source /opt/ros/humble/setup.bash && source ~/Documents/Visual_Inspection_ws/inspection_ws/install/setup.bash +ros2 run visual_inspection_ros servo_node + +# T3 +source /opt/ros/humble/setup.bash && source ~/Documents/Visual_Inspection_ws/inspection_ws/install/setup.bash +ros2 run visual_inspection_ros ibvs_action_server + +# T4 — collection script +python3 ~/Documents/Visual_Inspection_ws/evaluation/collect_dataset.py +``` + +### ROS Topic Monitor Commands + +```bash +# Check all active topics +ros2 topic list + +# Watch camera feed is running (should publish at 30Hz) +ros2 topic hz /visual_inspection/insta360/image_raw + +# Watch Logitech camera +ros2 topic hz /visual_inspection/logitech/image_raw + +# Watch YOLO detections +ros2 topic echo /visual_inspection/detections + +# Watch current system status +ros2 topic echo /visual_inspection/status + +# Watch IBVS error live +ros2 topic echo /visual_inspection/ibvs_error + +# Check action server is ready +ros2 action list + +# Open RViz to see camera feed +rviz2 +# Then: Add → By Topic → /visual_inspection/debug → Image +``` + +### Check Collected Images + +```bash +# Count all collected images +find ~/Documents/Visual_Inspection_ws/evaluation/ -name "*.jpg" | wc -l + +# See folder breakdown +find ~/Documents/Visual_Inspection_ws/evaluation/ -name "*.jpg" | \ + sed 's|/[^/]*\.jpg||' | sort | uniq -c | sort -rn + +# Check latest capture +ls -lt ~/Documents/Visual_Inspection_ws/captures/inspection/ | head -5 + +# View capture log +cat ~/Documents/Visual_Inspection_ws/evaluation/capture_log.csv +``` + +### SCP Images to Laptop + +```bash +# Run on LAPTOP after each session +scp -r rgen@192.168.8.181:~/Documents/Visual_Inspection_ws/evaluation/ \ + /home/dinethra/Jetson_orin_nano/Evaluation_V_I_ws/eval_dataset/ + +# Or just the log +scp rgen@192.168.8.181:~/Documents/Visual_Inspection_ws/evaluation/capture_log.csv \ + /home/dinethra/Jetson_orin_nano/Evaluation_V_I_ws/eval_dataset/capture_log.csv +``` + +--- + +## Script Features + +Press `s` in the main menu to see a **progress bar** showing how many images in each folder. + +All sessions let you: +- Type **any angle** (0 to 180°, any direction) +- Type **any exact distance** you measured (e.g. 1.2m, 2.4m) +- Type **how many images** you want at that position +- Type **any occlusion %** you applied +- Decide when you're done — `q` goes back to menu, `y` continues + +--- + +## What to Collect Per Session + +### Session 1 — Reference +- Pick object → robot at 1m, 0° → press ENTER +- 5 images per object (fire_ext, gauge, door) + +### Session 2 — Angle +- Pick ANY angle (0°, 15°, 30°, 45°, 60°, 90° etc.) +- Pick L or R direction (covers full 180°) +- Type actual measured distance (can vary ±20cm per position) +- 10 images per position +- Keep going: change angle → type new angle → more images + +### Session 3 — Distance +- Type exact measured distance (0.5m, 1.0m, 1.5m, 2.0m, 2.5m, 3.0m, 4.0m) +- 10 images per distance +- Keep going: new distance → more images + +### Session 4 — Gauge Ground Truth +- Type true reading → 3 images → type next reading → repeat +- Skip if no gauge equipment (q to exit) + +### Session 5 — VLM Images +- Pick scenario (fire_ext pass/fail, exit pass/fail, door, cylinder) +- Type caption after each capture (1 sentence) +- 10 images per scenario + +### Session 6 — Occlusion +- Type exact % you covered (0, 10, 25, 50, 75, 90...) +- 10 images per level + +### Session 7 — Multi-object +- Pick scene type → 5 images per scene + +--- + +## Checklist + +``` +[ ] Reference: fire_ext (5) gauge (5) door (5) +[ ] Angle: 0° 15°L 15°R 30°L 30°R 45°L 45°R (10 each) +[ ] Angle extra: 60°L 60°R 90°L 90°R (10 each, if time) +[ ] Angle vertical: up down (5-10 each) +[ ] Distance: 1m 1.5m 2m 2.5m 3m 3.5m 4m (10 each) +[ ] Gauge GT: 5 reading positions × 3 (skip if no equipment) +[ ] VLM: fire_ext pass (10) fail (10) +[ ] VLM: exit pass (10) fail (10) +[ ] VLM: door pass (10) +[ ] Occlusion: 0% 25% 50% 75% (10 each) +[ ] Multi-object: 2 same class, mixed class (5 each) +``` diff --git a/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/Evaluation_V_I_ws/collect_dataset.py b/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/Evaluation_V_I_ws/collect_dataset.py new file mode 100644 index 00000000..e4789b5a --- /dev/null +++ b/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/Evaluation_V_I_ws/collect_dataset.py @@ -0,0 +1,577 @@ +#!/usr/bin/env python3 +""" +collect_dataset.py — Visual Inspection Dataset Collector +Fully flexible: you enter angle, distance, occlusion, n_images yourself. + +python3 ~/Documents/Visual_Inspection_ws/evaluation/collect_dataset.py +""" + +import os, sys, re, time, csv, shutil, subprocess +from pathlib import Path +from datetime import datetime + +# ── Paths ──────────────────────────────────────────────────────────────────── +BASE = Path.home() / 'Documents/Visual_Inspection_ws' +EVAL = BASE / 'evaluation' +CAPTURES = BASE / 'captures/inspection' +LOG_CSV = EVAL / 'capture_log.csv' +SVC_TEST = BASE / 'test_scripts/test_inspection_service.py' + +# ── Colors ─────────────────────────────────────────────────────────────────── +R='\033[1;31m'; G='\033[1;32m'; Y='\033[1;33m' +C='\033[1;36m'; W='\033[1m'; X='\033[0m' + +def clr(): os.system('clear') +def hdr(t): print(f'\n{C}{"═"*58}\n {t}\n{"═"*58}{X}') +def ok(t): print(f'{G} ✓ {t}{X}') +def bad(t): print(f'{R} ✗ {t}{X}') +def info(t): print(f'{Y} → {t}{X}') +def div(): print(' ' + '─'*54) +def prompt(q, d=''): + hint = f' [{d}]' if d else '' + v = input(f' {W}{q}{hint}: {X}').strip() + return v if v else d + +# ── CSV ────────────────────────────────────────────────────────────────────── +COLS = ['timestamp','folder','filename','object_type','distance_m', + 'angle_deg','angle_direction','occlusion_pct','n_objects', + 'ibvs_time_s','ibvs_fps','initial_error_px','final_error_px','converged', + 'coarse_time_s','pipeline_time_s', + 'detection_confidence','objects_inspected', + 'ground_truth_value','notes'] + +def init_log(): + EVAL.mkdir(parents=True, exist_ok=True) + if not LOG_CSV.exists(): + with open(LOG_CSV,'w',newline='') as f: + csv.writer(f).writerow(COLS) + +def log_row(**kw): + kw.setdefault('timestamp', datetime.now().strftime('%Y-%m-%d_%H:%M:%S')) + with open(LOG_CSV,'a',newline='') as f: + csv.DictWriter(f, fieldnames=COLS).writerow(kw) + +def count_rows(): + if not LOG_CSV.exists(): return 0 + return max(0, sum(1 for _ in open(LOG_CSV)) - 1) + +def count_in(subfolder): + d = EVAL / subfolder + if not d.exists(): return 0 + return len(list(d.glob('*.jpg'))) + +# ── Service call ──────────────────────────────────────────────────────────────────── + +def run_and_parse(target_object: str = '', location_label: str = 'eval'): + """ + Call /visual_inspection/inspect via test_inspection_service.py. + After the run, reads metrics from metadata.json saved by the service. + Returns (img_path, ibvs_time, ibvs_err, converged, objects_inspected, status, conf). + """ + before_meta = set() + before_imgs = set() + if CAPTURES.exists(): + before_meta = {f for f in CAPTURES.rglob('metadata.json')} + before_imgs = {f for f in CAPTURES.rglob('img_*.jpg')} + + objects_inspected = 0 + status = 'unknown' + info(f'Calling service: object="{target_object or "any"}" loc="{location_label}"') + + src = (f'source /opt/ros/humble/setup.bash && ' + f'source {BASE}/inspection_ws/install/setup.bash') + obj_flag = f'--object "{target_object}"' if target_object else '' + loc_flag = f'--location "{location_label}"' + cmd = f'bash -c "{src} && python3 {SVC_TEST} {obj_flag} {loc_flag}"' + + proc = None + try: + proc = subprocess.Popen(cmd, shell=True, + stdout=subprocess.PIPE, stderr=subprocess.STDOUT, + text=True) + for line in proc.stdout: + l = line.strip() + if l: print(f' {l}') + m2 = re.search(r'objects_inspected\s*:\s*(\d+)', l) + if m2: objects_inspected = int(m2.group(1)) + m3 = re.search(r'status\s*:\s*(\S+)', l) + if m3: status = m3.group(1) + proc.wait(timeout=180) + except Exception as e: + bad(f'Service call error: {e}') + finally: + if proc: + try: proc.kill(); proc.wait(timeout=3) + except Exception: pass + + time.sleep(2) + + # ── Read metrics from metadata.json (written by inspection_service) ──────── + import json as _json + ibvs_time, ibvs_err, converged, conf = 0.0, 0.0, False, 0.0 + ibvs_fps, coarse_time, pipeline_time = 0.0, 0.0, 0.0 + initial_err = 0.0 + new_imgs = [] + + if CAPTURES.exists(): + after_meta = {f for f in CAPTURES.rglob('metadata.json')} + after_imgs = {f for f in CAPTURES.rglob('img_*.jpg')} + new_meta = after_meta - before_meta + new_imgs = sorted(after_imgs - before_imgs, key=lambda f: f.stat().st_mtime) + + if new_meta: + meta_f = max(new_meta, key=lambda f: f.stat().st_mtime) + try: + md = _json.loads(meta_f.read_text()) + conf = float(md.get('confidence', 0.0)) + converged = bool( md.get('ibvs_converged', False)) + ibvs_err = float(md.get('ibvs_error_px', 0.0)) + ibvs_time = float(md.get('ibvs_time_s', 0.0)) + ibvs_fps = float(md.get('ibvs_fps', 0.0)) + coarse_time = float(md.get('coarse_time_s', 0.0)) + pipeline_time = float(md.get('pipeline_time_s', 0.0)) + initial_err = float(md.get('initial_ibvs_error_px',0.0)) + except Exception as e: + bad(f'metadata.json parse error: {e}') + + img_path = new_imgs[0] if new_imgs else None + if new_imgs: + ok(f'conf={conf:.3f} | coarse_err={initial_err:.1f}px | ' + f'ibvs={ibvs_time:.2f}s | final_err={ibvs_err:.1f}px | ' + f'fps={ibvs_fps} | pipeline={pipeline_time:.1f}s | converged={converged}') + else: + bad('No new images — check inspection_service is running (T3)') + + return img_path, ibvs_time, ibvs_err, converged, objects_inspected, status, conf, ibvs_fps, coarse_time, pipeline_time, initial_err + + + + +def save_img(img_path, dest: Path, fname: str) -> bool: + dest.mkdir(parents=True, exist_ok=True) + if img_path and img_path.exists(): + shutil.copy2(img_path, dest / fname) + ok(f'Saved → {(dest/fname).relative_to(EVAL)}') + return True + bad('No new image detected — check pipeline') + return False + +def next_n(folder: Path, prefix='img_') -> int: + existing = list(folder.glob(f'{prefix}*.jpg')) + nums = [] + for f in existing: + m = re.search(r'(\d+)', f.stem.replace(prefix,'')) + if m: nums.append(int(m.group(1))) + return max(nums)+1 if nums else 1 + +# ── Single capture ──────────────────────────────────────────────────────────── +def one_capture(dest: Path, fname_prefix: str, subfolder_name: str, + obj_type: str, distance: str, angle_deg: str, angle_dir: str, + occlusion: str, n_objects: str='1', + ground_truth: str='N/A', ask_caption: bool=False) -> bool: + n = next_n(dest, prefix=fname_prefix) + fname = f'{fname_prefix}{n:02d}.jpg' + + input(f'\n {W}▶ Ready? Press ENTER to capture...{X}') + print() + img, ibvs_t, err_px, conv, n_insp, svc_status, det_conf, fps, coarse_t, pipe_t, init_err = run_and_parse( + target_object=obj_type, location_label=subfolder_name) + saved = save_img(img, dest, fname) + + if conv: + ok(f'IBVS: {ibvs_t:.2f}s | coarse={init_err:.1f}px→final={err_px:.1f}px | fps={fps} | pipeline={pipe_t:.1f}s | conf={det_conf:.3f}') + elif n_insp > 0: + ok(f'Inspected {n_insp} | status={svc_status} | conf={det_conf:.3f}') + else: + bad(f'IBVS: DID NOT CONVERGE (status={svc_status})') + + caption = '' + if ask_caption: + caption = input(f' {Y}Caption — describe exactly what camera sees: {X}').strip() + + log_row(folder=subfolder_name, filename=fname, object_type=obj_type, + distance_m=distance, angle_deg=angle_deg, angle_direction=angle_dir, + occlusion_pct=occlusion, n_objects=n_objects, + ibvs_time_s=round(ibvs_t,3), ibvs_fps=fps, + initial_error_px=round(init_err,2), final_error_px=round(err_px,2), + converged=conv, coarse_time_s=round(coarse_t,3), pipeline_time_s=round(pipe_t,2), + detection_confidence=round(det_conf,4), objects_inspected=n_insp, + ground_truth_value=ground_truth, notes=caption) + ok(f'Logged ({count_rows()} total so far)') + return saved + +# ── Sessions ────────────────────────────────────────────────────────────────── + +def ask_obj(): + print(f''' + Objects (YOLO classes + overview-only): + 1 fire_extinguisher (conf ≥ 0.5, full IBVS) + 2 gauge (conf ≥ 0.3, IBVS + sweep) + 3 door (conf ≥ 0.5, full IBVS) + 4 person (conf ≥ 0.5, full IBVS) + 5 unknown (overview only — no IBVS) + 6 main_cylinder (overview only — no IBVS)''') + m = {'1':'fire_extinguisher','2':'gauge','3':'door', + '4':'person','5':'unknown','6':'main_cylinder'} + return m.get(prompt('Select object','1'), 'fire_extinguisher') + +def session_reference(): + hdr('SESSION 1 — REFERENCE IMAGES') + print(f''' + Gold-standard baseline for all quality metrics. + SETUP: robot at 1m, directly facing object, good lighting. + Collect 5+ images per object type. +''') + obj = ask_obj() + + while True: + dist = prompt('Exact distance you measured (m)', '1.0') + n = int(prompt('How many images at this position','5')) + prefix = obj.split('_')[0]+'_ref_' + dest = EVAL / 'reference' + info(f'{n} × {obj} at {dist}m → evaluation/reference/') + + for i in range(n): + div() + print(f' {C}Capture {i+1}/{n}{X}') + one_capture(dest, prefix, 'reference', obj, dist, '0','center','0') + + print(f'\n {Y}Images in reference/: {count_in("reference")}{X}') + cont = prompt('Continue reference? (y=same obj / n=new obj / q=menu','y').lower() + if cont == 'q': break + if cont == 'n': obj = ask_obj() + +def session_angle(): + hdr('SESSION 2 — ANGLE EVALUATION') + print(f''' + Covers full 180° horizontal + vertical angles. + Object: fire_extinguisher (fixed) — only robot moves. + + HOW TO ENTER ANGLES: + Horizontal: enter degrees (e.g. 15, 30, 45, 60, 90, 135) + direction: L (left) or R (right) + Vertical: enter degrees (e.g. 15, 30) + direction: up or down + Head-on: enter 0 + + DISTANCE RANGE: Enter the actual measured distance each time. + e.g. 2.0m then 2.2m then 1.8m to cover ±20cm variation. +''') + obj = ask_obj() + + while True: + angle_in = prompt('Angle in degrees (e.g. 0, 15, 30, 45, 90)','0') + angle_dir = prompt('Direction (L / R / up / down / center)','center').lower() + dist = prompt('Exact measured distance (m)','2.0') + n = int(prompt('How many images at this position','10')) + + # Build folder name + if angle_in == '0' or angle_dir == 'center': + sub_angle = '0deg' + folder = 'angle_eval/horizontal/0deg' + elif angle_dir in ('up','down'): + sub_angle = f'{angle_in}deg_{angle_dir}' + folder = f'angle_eval/vertical/{sub_angle}' + else: + sub_angle = f'{angle_in}deg_{angle_dir.upper()}' + folder = f'angle_eval/horizontal/{sub_angle}' + + dest = EVAL / folder + info(f'{n} images | angle={sub_angle} | dist={dist}m | {folder}/') + print(f' {Y}Position robot at {dist}m, {sub_angle} offset from object.{X}') + + for i in range(n): + div() + print(f' {C}Capture {i+1}/{n} | {sub_angle} | {dist}m{X}') + one_capture(dest, 'img_', folder, obj, dist, + angle_in, angle_dir, '0') + + print(f'\n {Y}Images in {folder}/: {count_in(folder)}{X}') + cont = prompt('Next? (y=new angle / q=menu','y').lower() + if cont == 'q': break + +def session_distance(): + hdr('SESSION 3 — DISTANCE EVALUATION') + print(f''' + Find image quality vs distance. + Object: gauge (best for testing OCR at far distance) + Angle: 0° head-on (fixed) + + Measure the actual distance and type it. + Suggested: 1.0, 1.5, 2.0, 2.5, 3.0, 3.5, 4.0 m +''') + obj = ask_obj() + + while True: + dist = prompt('Exact measured distance (m)','2.0') + n = int(prompt('How many images','10')) + + # Snap to nearest folder bracket + d = float(dist) + if d <= 1.25: folder = 'distance_eval/1m' + elif d <= 1.75: folder = 'distance_eval/1.5m' + elif d <= 2.25: folder = 'distance_eval/2m' + elif d <= 2.75: folder = 'distance_eval/2.5m' + elif d <= 3.25: folder = 'distance_eval/3m' + elif d <= 3.75: folder = 'distance_eval/3.5m' + else: folder = 'distance_eval/4m' + + # Create folder with actual measured value in name + folder = f'distance_eval/{dist}m' + dest = EVAL / folder + info(f'{n} images at {dist}m → evaluation/{folder}/') + + for i in range(n): + div() + print(f' {C}Capture {i+1}/{n} | distance={dist}m | 0° head-on{X}') + one_capture(dest, 'img_', folder, obj, dist, '0','center','0') + + print(f'\n {Y}Images in {folder}/: {count_in(folder)}{X}') + cont = prompt('Next distance? (y / q=menu','y').lower() + if cont == 'q': break + +def session_gauge(): + hdr('SESSION 4 — GAUGE ACCURACY GROUND TRUTH') + print(f''' + Collect images at KNOWN gauge readings for MAE/RMSE evaluation. + You need physical equipment (pressure gauge / water gauge). + SKIP with q if no gauge available. + + Setup: manually set pointer to a known value, tape it in place. + Distance: 2m | Angle: 0° +''') + while True: + true_val = prompt('True gauge reading (e.g. 2.5) or q to exit','') + if true_val.lower() == 'q': break + dist = prompt('Exact measured distance (m)','2.0') + n = int(prompt('How many images at this reading','3')) + folder = 'gauge_accuracy' + dest = EVAL / folder + info(f'{n} images | gauge={true_val} | {dist}m') + + for i in range(n): + div() + print(f' {C}Capture {i+1}/{n} | gauge reading = {true_val}{X}') + fname = f'gauge_{true_val}_{dist}m_n{next_n(dest, f"gauge_{true_val}_")}.jpg' + # use direct save + input(f'\n {W}▶ Ready? Press ENTER to capture...{X}') + print() + img, ibvs_t, err_px, conv, _, _, det_conf, fps, coarse_t, pipe_t, init_err = run_and_parse( + target_object='gauge', location_label=folder) + if save_img(img, dest, fname): + if conv: ok(f'IBVS {ibvs_t:.2f}s | coarse={init_err:.1f}px→final={err_px:.1f}px | fps={fps} | conf={det_conf:.3f}') + else: bad('IBVS timeout') + log_row(folder=folder, filename=fname, object_type='gauge', + distance_m=dist, angle_deg='0', angle_direction='center', + occlusion_pct='0', n_objects='1', + ibvs_time_s=round(ibvs_t,3), ibvs_fps=fps, + initial_error_px=round(init_err,2), final_error_px=round(err_px,2), + converged=conv, coarse_time_s=round(coarse_t,3), pipeline_time_s=round(pipe_t,2), + detection_confidence=round(det_conf,4), objects_inspected=1, + ground_truth_value=true_val, notes=f'gauge={true_val}') + ok(f'Logged ({count_rows()} total)') + + + cont = prompt('Another reading? (y / q=menu','y').lower() + if cont == 'q': break + +def session_vlm(): + hdr('SESSION 5 — VLM PASS/FAIL IMAGES') + scenarios = { + '1': ('fire_ext_pass', 'fire_extinguisher', 'PASS', + 'Extinguisher on wall, NOTHING blocking it'), + '2': ('fire_ext_fail', 'fire_extinguisher', 'FAIL', + 'Place large box/chair DIRECTLY in front blocking access'), + '3': ('exit_pass', 'emergency_exit', 'PASS', + 'Door/exit with completely CLEAR walkway'), + '4': ('exit_fail', 'emergency_exit', 'FAIL', + 'Stack 2-3 chairs or boxes BLOCKING the door'), + '5': ('door_pass', 'door', 'PASS', + 'Regular door — open or closed, clear state'), + '6': ('door_fail', 'door', 'FAIL', + 'Door blocked by objects'), + '7': ('cylinder_pass', 'main_cylinder', 'PASS', + 'Machinery/cylinder — floor dry, no leak'), + '8': ('cylinder_fail', 'main_cylinder', 'FAIL', + 'Water on floor near cylinder (simulates oil leak)'), + } + print() + for k,(f,o,d,s) in scenarios.items(): + mark = G+'PASS'+X if d=='PASS' else R+'FAIL'+X + print(f' {k}: {f:20s} {mark} — {s}') + print() + + choice = prompt('Select scenario','1') + if choice not in scenarios: + bad('Invalid'); return + folder_name, obj_type, expected, setup = scenarios[choice] + subfolder = f'vlm_eval/{folder_name}' + dist = prompt('Distance (m)','2.0') + n = int(prompt('How many images','10')) + + print(f'\n {Y}SETUP: {setup}{X}') + input(' Set up scene, position robot. Press ENTER when ready...') + + dest = EVAL / subfolder + info(f'{n} images → evaluation/{subfolder}/') + print(f' {Y}Write 1-sentence caption after each capture.{X}\n') + + for i in range(n): + div() + print(f' {C}Capture {i+1}/{n} | {folder_name}{X}') + one_capture(dest, f'{folder_name}_', subfolder, obj_type, + dist, '0','center','0','1', expected, ask_caption=True) + + print(f'\n {Y}Images in {subfolder}/: {count_in(subfolder)}{X}') + cont = prompt('Collect more for this scenario? (y / q=menu','q').lower() + if cont == 'y': + n2 = int(prompt('How many more','5')) + for i in range(n2): + div() + print(f' {C}Extra capture {i+1}/{n2}{X}') + one_capture(dest, f'{folder_name}_', subfolder, obj_type, + dist,'0','center','0','1', expected, ask_caption=True) + +def session_occlusion(): + hdr('SESSION 6 — OCCLUSION EVALUATION') + print(f''' + How much occlusion can the system tolerate? + Object: fire_extinguisher | Distance: 2m | Angle: 0° + + Cover the object with tape/cardboard. + Enter the actual percentage you covered (0, 10, 25, 33, 50, 66, 75, 90...) +''') + obj = ask_obj() + + while True: + pct = prompt('Occlusion percentage you applied (e.g. 0, 25, 50, 75)','0') + dist = prompt('Exact measured distance (m)','2.0') + n = int(prompt('How many images','10')) + folder = f'occlusion/{pct}pct' + dest = EVAL / folder + + print(f'\n {Y}Apply {pct}% occlusion to object now.{X}') + input(' Press ENTER when ready...') + info(f'{n} images | occlusion={pct}% | dist={dist}m → evaluation/{folder}/') + + for i in range(n): + div() + print(f' {C}Capture {i+1}/{n} | occlusion={pct}%{X}') + one_capture(dest, 'img_', folder, obj, dist,'0','center', pct) + + print(f'\n {Y}Images in {folder}/: {count_in(folder)}{X}') + cont = prompt('Next occlusion level? (y / q=menu','y').lower() + if cont == 'q': break + +def session_multi(): + hdr('SESSION 7 — MULTI-OBJECT SCENES') + print(f''' + Test ByteTrack with multiple objects in view. + Scenes to try: + 1: 2 same-class side by side (2 fire extinguishers) + 2: Mixed class (1 gauge + 1 fire extinguisher) + 3: 1 front + 1 back (tests front/back zone detection) + 4: 3 objects across a panel +''') + scenes = { + '1':('2 fire_extinguishers side by side','2_objects','fire_extinguisher','2'), + '2':('1 gauge + 1 fire_ext','2_objects','mixed','2'), + '3':('1 front + 1 back zone','2_objects','fire_extinguisher','2'), + '4':('3 objects across panel','3_objects','gauge','3'), + } + for k,(d,f,o,n) in scenes.items(): print(f' {k}: {d}') + print() + choice = prompt('Select scene','1') + if choice not in scenes: bad('Invalid'); return + + desc, folder_key, obj, n_obj = scenes[choice] + folder = f'multi_object/{folder_key}' + dist = prompt('Distance (m)','2.0') + n = int(prompt('How many images','5')) + dest = EVAL / folder + + print(f'\n {Y}Set up: {desc}{X}') + input(' Press ENTER when ready...') + + for i in range(n): + div() + print(f' {C}Capture {i+1}/{n} | {desc}{X}') + one_capture(dest, 'img_', folder, obj, dist,'0','center','0', n_obj) + + print(f'\n {Y}Images in {folder}/: {count_in(folder)}{X}') + +def show_summary(): + hdr('CURRENT COLLECTION STATUS') + sections = [ + ('reference', 'Reference images'), + ('angle_eval/horizontal', 'Angle H — all'), + ('angle_eval/vertical', 'Angle V — all'), + ('distance_eval', 'Distance — all'), + ('gauge_accuracy', 'Gauge ground truth'), + ('vlm_eval/fire_ext_pass', 'VLM fire_ext PASS'), + ('vlm_eval/fire_ext_fail', 'VLM fire_ext FAIL'), + ('vlm_eval/exit_pass', 'VLM exit PASS'), + ('vlm_eval/exit_fail', 'VLM exit FAIL'), + ('vlm_eval/door_pass', 'VLM door PASS'), + ('occlusion', 'Occlusion — all'), + ('multi_object', 'Multi-object'), + ] + for path, label in sections: + d = EVAL / path + if d.exists(): + imgs = len(list(d.rglob('*.jpg'))) + bar = G if imgs >= 10 else (Y if imgs > 0 else R) + print(f' {bar}{"■"*min(imgs,30):30s}{X} {label:<28} {imgs} images') + else: + print(f' {R}{"□"*30}{X} {label:<28} 0 images') + print(f'\n Total logged: {count_rows()} rows in CSV') + +# ── Main menu ───────────────────────────────────────────────────────────────── + +def main(): + init_log() + sessions = { + '1': ('Reference images', session_reference), + '2': ('Angle evaluation (any angle)', session_angle), + '3': ('Distance evaluation', session_distance), + '4': ('Gauge ground truth', session_gauge), + '5': ('VLM PASS/FAIL images', session_vlm), + '6': ('Occlusion evaluation', session_occlusion), + '7': ('Multi-object scenes', session_multi), + 's': ('Show collection status', show_summary), + } + + while True: + clr() + hdr('VISUAL INSPECTION — DATASET COLLECTION') + print(f''' + {W}MAKE SURE T1, T2, T3 ARE RUNNING FIRST:{X} + T1: ros2 run visual_inspection_ros camera_node + T2: ros2 run visual_inspection_ros servo_node + T3: ros2 run visual_inspection_ros ibvs_action_server + + {Y}Total images logged: {count_rows()}{X} +''') + for k,(label,_) in sessions.items(): + print(f' {C}{k}{X}: {label}') + print(f' {C}q{X}: Quit') + print() + + choice = input(f' {W}▶ Select session: {X}').strip().lower() + if choice == 'q': + break + elif choice in sessions: + sessions[choice][1]() + input(f'\n{G} Done! Press ENTER to return to menu...{X}') + else: + bad('Invalid choice'); time.sleep(1) + + clr() + hdr('SESSION ENDED') + show_summary() + print(f'\n {W}SCP to laptop:{X}') + print(f' scp -r rgen@192.168.8.181:{EVAL} \\') + print(f' /home/dinethra/Jetson_orin_nano/Evaluation_V_I_ws/eval_dataset/') + +if __name__ == '__main__': + main() diff --git a/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/Evaluation_V_I_ws/collect_yolo_dataset.py b/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/Evaluation_V_I_ws/collect_yolo_dataset.py new file mode 100644 index 00000000..74c2066d --- /dev/null +++ b/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/Evaluation_V_I_ws/collect_yolo_dataset.py @@ -0,0 +1,436 @@ +#!/usr/bin/env python3 +""" +collect_yolo_dataset.py — YOLO Training Data Collector +========================================================= +Run on Jetson (inside venv, no ROS needed). + + source ~/Documents/Visual_Inspection_ws/venv/bin/activate + cd ~/Documents/Visual_Inspection_ws + python3 collect_yolo_dataset.py + +Controls (in the camera window): + ← → Pan left / right + ↑ ↓ Tilt up / down + SPACE Capture both cameras simultaneously + c Change class + h Home (pan=90, tilt=90) + +/- Increase/decrease step size + q Quit + +Output: + yolo_dataset/ + fire_extinguisher/ + insta360/ img_0001.jpg img_0002.jpg ... + logitech/ img_0001.jpg img_0002.jpg ... + door/ + insta360/ ... + logitech/ ... + gauge/ + insta360/ ... + logitech/ ... +""" + +import cv2 +import serial +import time +import os +import glob +from pathlib import Path +from datetime import datetime + +# ── Paths ───────────────────────────────────────────────────────────────────── +BASE = Path.home() / 'Documents/Visual_Inspection_ws' +OUT_DIR = BASE / 'yolo_dataset' +ARDUINO = '/dev/ttyACM0' +BAUD = 9600 + +# ── Classes ─────────────────────────────────────────────────────────────────── +CLASSES = ['fire_extinguisher', 'door', 'gauge'] + +# ── Servo ───────────────────────────────────────────────────────────────────── +PAN_MIN, PAN_MAX = 0, 180 +TILT_MIN, TILT_MAX = 20, 160 +TILT_INVERT = True # flip if tilt direction is wrong (matches ROS action server) + +# ── Colors ──────────────────────────────────────────────────────────────────── +GREEN = (0, 255, 80) +YELLOW = (0, 220, 255) +RED = (0, 60, 255) +WHITE = (240, 240, 240) +CYAN = (255, 220, 0) +DARK = (30, 30, 30) + +# ───────────────────────────────────────────────────────────────────────────── +# Camera detection (3-layer from ibvs_pipeline.py) +# ───────────────────────────────────────────────────────────────────────────── +VENDOR_MAP = { + 'Insta360': ('2e1a', '/dev/insta360'), + 'HD Pro Webcam': ('046d', '/dev/logitech'), + 'Logitech': ('046d', '/dev/logitech'), +} + +def find_camera(name_pattern): + vendor_id, udev_path = None, None + for key, (vid, udev) in VENDOR_MAP.items(): + if key in name_pattern or name_pattern in key: + vendor_id, udev_path = vid, udev + break + + # Layer 1: udev symlink (e.g. /dev/insta360, /dev/logitech) + if udev_path and os.path.exists(udev_path): + cap = cv2.VideoCapture(udev_path) + if cap.isOpened(): + ret, frame = cap.read() + cap.release() + if ret and frame is not None and frame.size > 0: + idx = int(os.path.realpath(udev_path).replace('/dev/video', '')) + return idx + + # Layer 2: vendor ID via sysfs — open by PATH STRING (integer index fails on Jetson) + if vendor_id: + for path in sorted(glob.glob('/sys/class/video4linux/video*')): + try: + check = os.path.realpath(path) + for _ in range(8): + vid_file = os.path.join(check, 'idVendor') + if os.path.exists(vid_file): + with open(vid_file) as f: + if f.read().strip() == vendor_id: + idx = int(os.path.basename(path).replace('video', '')) + dev_path = f'/dev/video{idx}' + cap = cv2.VideoCapture(dev_path) # path string, not int + if cap.isOpened(): + ret, frame = cap.read() + cap.release() + if ret and frame is not None and frame.size > 0: + return idx + break + check = os.path.dirname(check) + except: + pass + + # Layer 3: brute-force try /dev/video0 .. /dev/video9 matching device name + for idx in range(10): + dev_path = f'/dev/video{idx}' + if not os.path.exists(dev_path): + continue + try: + name_file = f'/sys/class/video4linux/video{idx}/name' + if os.path.exists(name_file): + with open(name_file) as f: + dev_name = f.read().strip() + if name_pattern.split()[0].lower() in dev_name.lower(): + cap = cv2.VideoCapture(dev_path) + if cap.isOpened(): + ret, frame = cap.read() + cap.release() + if ret and frame is not None and frame.size > 0: + return idx + except: + pass + + return -1 + +# ───────────────────────────────────────────────────────────────────────────── +# Arduino controller +# ───────────────────────────────────────────────────────────────────────────── +class Arduino: + def __init__(self, port=ARDUINO, baud=BAUD): + self.ser = None + try: + self.ser = serial.Serial(port, baud, timeout=0.5) + time.sleep(2.0) # wait for Arduino reset + print(f' Arduino connected: {port}') + except Exception as e: + print(f' Arduino NOT connected: {e}') + print(' Servo control DISABLED — keyboard still works for capture') + + def send(self, cmd): + if self.ser and self.ser.is_open: + try: + self.ser.write((cmd + '\n').encode()) + time.sleep(0.03) + except: + pass + + def pan(self, angle): + angle = max(PAN_MIN, min(PAN_MAX, angle)) + self.send(f'PAN:{angle}') + return angle + + def tilt(self, angle): + angle = max(TILT_MIN, min(TILT_MAX, angle)) + # Match ROS action server — invert before sending + hw = (180 - angle) if TILT_INVERT else angle + self.send(f'TILT:{hw}') + return angle + + def home(self): + self.pan(90) + self.tilt(90) + + def close(self): + if self.ser: + self.ser.close() + +# ───────────────────────────────────────────────────────────────────────────── +# Helpers +# ───────────────────────────────────────────────────────────────────────────── +def count_imgs(cls): + d_i = OUT_DIR / cls / 'insta360' + d_l = OUT_DIR / cls / 'logitech' + n_i = len(list(d_i.glob('*.jpg'))) if d_i.exists() else 0 + n_l = len(list(d_l.glob('*.jpg'))) if d_l.exists() else 0 + return n_i, n_l + +def next_idx(folder): + existing = list(folder.glob('img_*.jpg')) + if not existing: + return 1 + nums = [int(f.stem.split('_')[1]) for f in existing if f.stem.split('_')[1].isdigit()] + return max(nums) + 1 if nums else 1 + +def save_both(frame_insta, frame_logi, cls, n_captured): + ts = datetime.now().strftime('%H%M%S') + dir_i = OUT_DIR / cls / 'insta360' + dir_l = OUT_DIR / cls / 'logitech' + dir_i.mkdir(parents=True, exist_ok=True) + dir_l.mkdir(parents=True, exist_ok=True) + + idx_i = next_idx(dir_i) + idx_l = next_idx(dir_l) + + fname_i = dir_i / f'img_{idx_i:04d}.jpg' + fname_l = dir_l / f'img_{idx_l:04d}.jpg' + + saved = [] + if frame_insta is not None: + cv2.imwrite(str(fname_i), frame_insta) + saved.append(f'insta360/{fname_i.name}') + if frame_logi is not None: + cv2.imwrite(str(fname_l), frame_logi) + saved.append(f'logitech/{fname_l.name}') + + return saved + +def draw_hud(canvas, cls, pan, tilt, step, n_i, n_l, flash=False): + h, w = canvas.shape[:2] + cv2.rectangle(canvas, (0, 0), (w, 44), DARK, -1) + + status_color = GREEN if not flash else (0, 200, 255) + cv2.putText(canvas, f'CLASS: {cls}', (8, 28), + cv2.FONT_HERSHEY_SIMPLEX, 0.7, YELLOW, 2) + cv2.putText(canvas, f'PAN:{pan:3d} TILT:{tilt:3d} step:{step}', + (310, 28), cv2.FONT_HERSHEY_SIMPLEX, 0.55, WHITE, 1) + cv2.putText(canvas, f'insta:{n_i} logi:{n_l}', + (680, 28), cv2.FONT_HERSHEY_SIMPLEX, 0.55, GREEN, 1) + + cv2.rectangle(canvas, (0, h-30), (w, h), DARK, -1) + help_txt = '[SPACE]=capture [arrows]=pan/tilt [c]=class [h]=home [+/-]=step [q]=quit' + cv2.putText(canvas, help_txt, (6, h-10), + cv2.FONT_HERSHEY_SIMPLEX, 0.42, (180, 180, 180), 1) + +def select_class(): + os.system('clear') + print('') + print(' ╔══════════════════════════════════════╗') + print(' ║ YOLO DATASET COLLECTOR ║') + print(' ╚══════════════════════════════════════╝') + print('') + for i, c in enumerate(CLASSES, 1): + n_i, n_l = count_imgs(c) + print(f' {i}: {c:<22} insta={n_i} logi={n_l}') + print(' q: Quit') + print('') + ch = input(' Select class: ').strip().lower() + if ch == 'q': + return None + if ch.isdigit() and 1 <= int(ch) <= len(CLASSES): + return CLASSES[int(ch) - 1] + return CLASSES[0] + +# ───────────────────────────────────────────────────────────────────────────── +# Main +# ───────────────────────────────────────────────────────────────────────────── +def main(): + # ── Find cameras ───────────────────────────────────────────────────────── + print('\nDetecting cameras...') + insta_idx = find_camera('Insta360') + logi_idx = find_camera('Logitech') + + # Fallback: sysfs name for Logitech C920 is "HD Pro Webcam C920" (no "logitech" word) + # Try /dev/video0 and /dev/video1 directly, skip whichever insta360 already claimed + if logi_idx < 0: + for try_idx in [0, 1, 2, 3]: + if try_idx == insta_idx: + continue + dev = f'/dev/video{try_idx}' + if not os.path.exists(dev): + continue + cap_test = cv2.VideoCapture(dev) + if cap_test.isOpened(): + ret, frm = cap_test.read() + cap_test.release() + if ret and frm is not None and frm.size > 0: + logi_idx = try_idx + break + + cap_insta, cap_logi = None, None + + if insta_idx >= 0: + cap_insta = cv2.VideoCapture(f'/dev/video{insta_idx}') + cap_insta.set(cv2.CAP_PROP_FRAME_WIDTH, 640) + cap_insta.set(cv2.CAP_PROP_FRAME_HEIGHT, 360) + cap_insta.set(cv2.CAP_PROP_BUFFERSIZE, 1) + print(f' Insta360 -> /dev/video{insta_idx}') + else: + print(' Insta360 -> NOT FOUND (will capture black frame)') + + if logi_idx >= 0: + cap_logi = cv2.VideoCapture(f'/dev/video{logi_idx}') + cap_logi.set(cv2.CAP_PROP_FOURCC, cv2.VideoWriter_fourcc('M','J','P','G')) + cap_logi.set(cv2.CAP_PROP_FRAME_WIDTH, 640) + cap_logi.set(cv2.CAP_PROP_FRAME_HEIGHT, 480) + cap_logi.set(cv2.CAP_PROP_BUFFERSIZE, 1) + print(f' Logitech -> /dev/video{logi_idx}') + else: + print(' Logitech -> NOT FOUND (will capture black frame)') + + # ── Arduino ─────────────────────────────────────────────────────────────── + ard = Arduino() + pan, tilt = 90, 90 + ard.home() + step = 5 + + # ── Class selection ─────────────────────────────────────────────────────── + cls = select_class() + if cls is None: + return + + # ── Window ──────────────────────────────────────────────────────────────── + WIN = 'YOLO Dataset Collector — press q to quit' + cv2.namedWindow(WIN, cv2.WINDOW_NORMAL) + cv2.resizeWindow(WIN, 1280, 420) + + flash_timer = 0 + n_captured = 0 + print(f'\n Collecting: {cls}') + print(' Window open — use keyboard controls') + + while True: + # Read frames + fi, fl = None, None + if cap_insta and cap_insta.isOpened(): + ret, fi = cap_insta.read() + if not ret: fi = None + if cap_logi and cap_logi.isOpened(): + ret, fl = cap_logi.read() + if not ret: fl = None + + # Build side-by-side canvas + left = fi.copy() if fi is not None else __import__('numpy').zeros((360,640,3), dtype='uint8') + right = fl.copy() if fl is not None else __import__('numpy').zeros((360,640,3), dtype='uint8') + left = cv2.resize(left, (640, 360)) + right = cv2.resize(right, (640, 360)) + + cv2.putText(left, 'INSTA360', (8, 350), cv2.FONT_HERSHEY_SIMPLEX, 0.6, RED, 2) + cv2.putText(right, 'LOGITECH', (8, 350), cv2.FONT_HERSHEY_SIMPLEX, 0.6, CYAN, 2) + + canvas = __import__('numpy').hstack([left, right]) + canvas = __import__('numpy').vstack([ + __import__('numpy').zeros((50, 1280, 3), dtype='uint8'), + canvas, + __import__('numpy').zeros((34, 1280, 3), dtype='uint8'), + ]) + + n_i, n_l = count_imgs(cls) + draw_hud(canvas, cls, pan, tilt, step, n_i, n_l, flash=flash_timer > 0) + if flash_timer > 0: flash_timer -= 1 + + # Capture flash indicator + if flash_timer > 3: + h, w = canvas.shape[:2] + cv2.rectangle(canvas, (0,0),(w,h), (0,200,200), 6) + cv2.putText(canvas, 'CAPTURED!', (500, 220), + cv2.FONT_HERSHEY_SIMPLEX, 2.0, (0,255,100), 4) + + cv2.imshow(WIN, canvas) + + key = cv2.waitKey(30) & 0xFF + + # ── Quit ───────────────────────────────────────────────────────────── + if key == ord('q'): + break + + # ── Change class ───────────────────────────────────────────────────── + elif key == ord('c'): + cv2.destroyAllWindows() + cls = select_class() + if cls is None: + break + cv2.namedWindow(WIN, cv2.WINDOW_NORMAL) + cv2.resizeWindow(WIN, 1280, 420) + print(f'\n Collecting: {cls}') + + # ── Home ───────────────────────────────────────────────────────────── + elif key == ord('h'): + pan, tilt = 90, 90 + ard.home() + + # ── Step size ───────────────────────────────────────────────────────── + elif key in (ord('+'), ord('=')): + step = min(step + 1, 15) + elif key == ord('-'): + step = max(step - 1, 1) + + # ── Pan ─────────────────────────────────────────────────────────────── + elif key == 81 or key == 2424832: # Left arrow + pan = ard.pan(pan - step) + elif key == 83 or key == 2555904: # Right arrow + pan = ard.pan(pan + step) + + # ── Tilt ────────────────────────────────────────────────────────────── + elif key == 82 or key == 2490368: # Up arrow + tilt = ard.tilt(tilt - step) # less = servo looks up + elif key == 84 or key == 2621440: # Down arrow + tilt = ard.tilt(tilt + step) + + # ── CAPTURE ────────────────────────────────────────────────────────── + elif key == 32: # SPACE + # Flush buffers — read a few frames first to get the latest + for _ in range(3): + if cap_insta: cap_insta.read() + if cap_logi: cap_logi.read() + ret_i, fi_cap = (cap_insta.read() if cap_insta else (False, None)) + ret_l, fl_cap = (cap_logi.read() if cap_logi else (False, None)) + if not ret_i: fi_cap = None + if not ret_l: fl_cap = None + + saved = save_both(fi_cap, fl_cap, cls, n_captured) + n_captured += 1 + flash_timer = 10 + print(f' [{n_captured}] Saved: {", ".join(saved)} (class={cls})') + + # ── Cleanup ─────────────────────────────────────────────────────────────── + if cap_insta: cap_insta.release() + if cap_logi: cap_logi.release() + cv2.destroyAllWindows() + ard.home() + time.sleep(0.5) + ard.close() + + # ── Final summary ───────────────────────────────────────────────────────── + print('\n══════════════════════════════════════════') + print(' COLLECTION COMPLETE') + print('══════════════════════════════════════════') + for c in CLASSES: + n_i, n_l = count_imgs(c) + print(f' {c:<24} insta={n_i:3d} logi={n_l:3d}') + print(f'\n Output: {OUT_DIR}') + print('\n SCP to laptop:') + print(f' rsync -avz rgen@192.168.8.181:{OUT_DIR}/ \\') + print(f' /home/dinethra/Jetson_orin_nano/data/yolo_dataset/') + print('══════════════════════════════════════════\n') + +if __name__ == '__main__': + main() diff --git a/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/Evaluation_V_I_ws/data_collect.md b/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/Evaluation_V_I_ws/data_collect.md new file mode 100644 index 00000000..444a6584 --- /dev/null +++ b/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/Evaluation_V_I_ws/data_collect.md @@ -0,0 +1,454 @@ +# Visual Inspection System — Data Collection & Evaluation Master Guide + +**System:** Go2 quadruped robot + Jetson Orin Nano + Insta360 (pole-mounted) + Logitech C920 (pan-tilt) +**Pipeline:** Insta360 → YOLOv11n TensorRT → ByteTrack → Coarse (degree-4 polynomial) → IBVS PID → Logitech ROI → Server (Gauge 10-step or VLM/Gemini) +**See also:** `evaluation_plan.md` — full metric formulas and evaluation scripts + +--- + +## HOW DATA IS STORED — READ THIS FIRST + +Every capture stores data in **two places**. You need both. Do not confuse them. + +### 1. `capture_log.csv` — Runtime metadata per capture (Jetson) +**Location on Jetson:** `~/Documents/Visual_Inspection_ws/evaluation/capture_log.csv` +**Location on laptop:** `/home/dinethra/Jetson_orin_nano/Evaluation_V_I_ws/capture_log.csv` (after scp) + +This is written **automatically** by `collect_dataset.py` each time you press ENTER to capture. + +**Current columns and what they mean:** + +| Column | Example | Status | Used for | +|---|---|---|---| +| `timestamp` | `2026-04-18_19:20:24` | ✅ Working | Traceability | +| `folder` | `reference` | ✅ Working | Knowing which session | +| `filename` | `fire_ref_28.jpg` | ✅ Working | Linking image to metadata | +| `object_type` | `fire_extinguisher` | ✅ Working | All metrics | +| `distance_m` | `1` | ✅ Working | Distance robustness | +| `angle_deg` | `0` | ✅ Working | Angle robustness | +| `angle_direction` | `center` | ✅ Working | Angle robustness | +| `occlusion_pct` | `0` | ✅ Working | Occlusion robustness | +| `n_objects` | `1` | ✅ Working | Multi-object eval | +| `ibvs_time_s` | `0.92` | ✅ Real seconds | IBVS convergence time | +| `ibvs_fps` | `0.0` | ⚠️ Bug (ignore) | IBVS FPS | +| `final_error_px` | `9.85` | ✅ Real px | Final centering error | +| `converged` | `True` | ✅ Working | Convergence rate % | +| `coarse_time_s` | `0.0` | ⚠️ Not set | Coarse stage time | +| `pipeline_time_s` | `0.0` | ⚠️ Bug (ignore) | Total pipeline time | +| `detection_confidence` | `0.8975` | ✅ Working | YOLO confidence | +| `objects_inspected` | `1` | ✅ Working | Multi-object handling | +| `ground_truth_value` | `N/A` or `2.5` | ✅ You enter | Gauge MAE/RMSE, VLM PASS/FAIL | +| `notes` | caption text | ✅ You enter | BERTScore, LLM-judge | + +> **During collection, verify after each capture:** open a second terminal and run +> `tail -3 ~/Documents/Visual_Inspection_ws/evaluation/capture_log.csv` +> You should see the new row with real `ibvs_time_s` (not 0.0) and `converged=True/False`. + +### 2. Captured images — Logitech ROI images (Jetson) +**Location:** `~/Documents/Visual_Inspection_ws/captures/inspection///instance_N/` + +Each capture saves **4 Logitech images** + 1 `metadata.json`: +``` +captures/inspection/20260418_192024/fire_extinguisher/instance_1/ + img_01_conf0.90.jpg + img_02_conf0.90.jpg + img_03_conf0.90.jpg + img_04_conf0.90.jpg + metadata.json ← ibvs_time_s, ibvs_error_px, ibvs_converged, confidence all here +``` + +**These images are the input to all offline evaluation scripts.** SSIM, PSNR, VIF, AlexNet, gauge reading, VLM decisions — all computed by running scripts on these images AFTER collection. + +> **During collection, verify:** `ls ~/Documents/Visual_Inspection_ws/captures/inspection/ | tail -5` +> You should see new timestamped folders appearing after each capture. + +--- + +## WHAT GETS COMPUTED WHEN + +| Metric | Stored during capture? | Computed when? | Script | +|---|---|---|---| +| IBVS convergence time | ✅ CSV `ibvs_time_s` | During collection | automatic | +| Final pixel error | ✅ CSV `final_error_px` | During collection | automatic | +| Convergence rate % | ✅ CSV `converged` | After — `converged.mean()` | pandas on CSV | +| YOLO confidence | ✅ CSV `detection_confidence` | During collection | automatic | +| **SSIM** | ❌ Not in CSV | **Offline** on images | `evaluate_image_quality.py` | +| **PSNR** | ❌ Not in CSV | **Offline** on images | `evaluate_image_quality.py` | +| **VIF** | ❌ Not in CSV | **Offline** on images | `evaluate_image_quality.py` | +| **AlexNet cosine similarity** | ❌ Not in CSV | **Offline** on images | `evaluate_image_quality.py` | +| **Gauge predicted reading** | ❌ Not in CSV | **Offline** — send image to server | `evaluate_gauge.py` | +| **Gauge MAE / RMSE** | ❌ Not in CSV | **Offline** from predicted vs GT | `evaluate_gauge.py` | +| **VLM PASS/FAIL decision** | ❌ Not in CSV | **Offline** — send image to server | `evaluate_vlm.py` | +| **VLM precision/recall/F1** | ❌ Not in CSV | **Offline** from decisions | `evaluate_vlm.py` | +| **BERTScore** | ❌ Not in CSV | **Offline** from captions | `evaluate_vlm.py` | +| **LLM-as-judge** | ❌ Not in CSV | **Offline** | `evaluate_vlm_llm_judge.py` | + +**Bottom line:** If images are saved and CSV has real `ibvs_time_s` values, you have everything needed. The CSV alone is not the final result — it is one of two inputs. + +--- + +## COLLECTION PRIORITY ORDER + +``` +PRIORITY 1 — Reference Images (baseline for ALL image quality metrics) +PRIORITY 2 — Angle Evaluation (Professor specifically required this) +PRIORITY 3 — Distance Evaluation (1m, 2m, 3m, 4m) +PRIORITY 4 — Gauge Ground Truth (for MAE/RMSE) +PRIORITY 5 — VLM Labelled Images (PASS/FAIL for each object type) +PRIORITY 6 — Occlusion Evaluation +PRIORITY 7 — Multi-object scenes +``` + +--- + +## PRIORITY 1 — REFERENCE IMAGES + +### What we evaluate with this dataset +These are the **gold-standard baseline images** that every other image is compared against. +Used to compute: **SSIM, PSNR, VIF, AlexNet cosine similarity** for all other sessions. +Without reference images, you cannot run any image quality metric. + +### Parameters that must be stored + +**In CSV (automatic):** +- `ibvs_time_s` — should be < 3s (best possible conditions) +- `final_error_px` — should be < 10px +- `converged` — must be True (discard if False) +- `detection_confidence` — should be > 0.7 for extinguisher, > 0.3 for gauge + +**As image files (automatic):** +- 4 Logitech ROI images per capture in `captures/inspection/...` +- These are the reference images you will use in `evaluate_image_quality.py` + +**You enter manually:** +- `distance_m` = `1.0` (exact measurement) +- `angle_deg` = `0` +- `notes` = any issues (lighting, blur, etc.) + +### How to verify during collection +```bash +# Check CSV has real values (not all zeros): +tail -3 ~/Documents/Visual_Inspection_ws/evaluation/capture_log.csv +# → ibvs_time_s should be 0.5–5.0, NOT 0.0 +# → converged should be True + +# Check images exist: +ls ~/Documents/Visual_Inspection_ws/captures/inspection/ | tail -3 +``` + +### Setup +- Distance: **exactly 1.0 m** (tape measure) +- Angle: **0° head-on** — robot centred in front of object +- Lighting: even, no shadows, no glare on object +- Object: perfectly positioned, fully visible + +### Minimum: 5 images per object type (gauge, fire_ext, door) + +--- + +## PRIORITY 2 — ANGLE EVALUATION + +### What we evaluate with this dataset +**IBVS Centering Performance vs angle** — does convergence time increase at larger angles? +**Image quality vs angle** — does SSIM/PSNR drop at large angles? +**Maximum usable angle** — at what angle does the system fail? + +Fills in **Table 1** of the paper: Conv. Time, Final Error, Success % per angle. + +### Parameters that must be stored + +**In CSV (automatic):** +- `ibvs_time_s` — key metric: expect < 5s at 0°, longer at 45° +- `final_error_px` — key metric: expect < 10px when converged +- `converged` — key: success rate drops at large angles +- `detection_confidence` — does confidence drop at steep angles? +- `angle_deg` + `angle_direction` — **critical: enter correctly** +- `distance_m` — must be `2.0` for all angle tests + +**As image files (automatic):** +- Logitech ROI images → used for SSIM/PSNR/VIF vs reference + +**You enter manually:** +- `angle_deg` — the angle you set up (15, 30, 45) +- `angle_direction` — L or R + +### How to verify during collection +```bash +# After each angle position, check: +grep "angle_eval" ~/Documents/Visual_Inspection_ws/evaluation/capture_log.csv | tail -5 +# → You should see different angle_deg values: 15, 30, 45 +# → ibvs_time_s should INCREASE as angle increases +# → converged should go False at some point (that's the finding!) +``` + +### Angles to cover +| Angle | Direction | Images | Expected result | +|---|---|---|---| +| 0° | head-on | 10 | converge < 3s, err < 10px | +| 15° | L + R | 5 each | converge < 5s | +| 30° | L + R | 5 each | converge 5-8s | +| 45° | L + R | 5 each | may fail to converge | +| 60° | L + R | 3 each | likely fails | + +Distance fixed at **2.0 m** for all. + +--- + +## PRIORITY 3 — DISTANCE EVALUATION + +### What we evaluate with this dataset +**How performance degrades with distance.** +CSV gives: convergence time vs distance, final error vs distance, success rate vs distance. +Images give: SSIM/PSNR/VIF vs distance — showing image quality degradation. +At 4m: test if gauge OCR can still read numbers (visual check of images). + +Fills in **Table 1** of the paper: SSIM column and Conv. Time column per distance. + +### Parameters that must be stored + +**In CSV (automatic):** +- `ibvs_time_s` — expect 2s at 1m, up to 8s at 4m +- `final_error_px` — larger at farther distances +- `converged` — may fail at 4m +- `detection_confidence` — drops with distance +- `distance_m` — **critical: enter exact measured value** + +**As image files (automatic):** +- Logitech ROI images → compared to reference for SSIM/PSNR vs distance + +**You enter:** +- `distance_m` — tape-measured, not estimated + +### How to verify during collection +```bash +grep "distance_eval" ~/Documents/Visual_Inspection_ws/evaluation/capture_log.csv +# → distance_m column should have: 1.0, 1.5, 2.0, 2.5, 3.0, 3.5, 4.0 +# → ibvs_time_s should increase as distance increases +``` + +### Distances: 1.0, 1.5, 2.0, 2.5, 3.0, 3.5, 4.0 m +- Object: **gauge** (best to show OCR degradation) +- Angle: always **0°** +- 10 images per distance + +--- + +## PRIORITY 4 — GAUGE GROUND TRUTH + +### What we evaluate with this dataset +**Gauge reading pipeline accuracy (MAE, RMSE, MAPE, Success Rate).** +The pipeline (10-step CV+DL on server) predicts a reading. You compare to true value. +`MAE = mean(|predicted - true|)` — needs `ground_truth_value` in CSV for every image. + +### Parameters that must be stored + +**In CSV (automatic):** +- `ibvs_time_s`, `final_error_px`, `converged` — IBVS quality +- `detection_confidence` — gauge confidence + +**In CSV — YOU MUST ENTER:** +- `ground_truth_value` — the **actual gauge reading** (e.g. `2.5`) + **This is THE most critical field for this session. Without it, no MAE/RMSE is possible.** +- `notes` — describe pointer position precisely ("between 2 and 3 markings, closer to 2.5") + +**As image files (automatic):** +- Logitech ROI images → sent to server by `evaluate_gauge.py` → get predicted readings + +**Computed offline (NOT in CSV):** +- Predicted gauge reading from server +- MAE, RMSE, MAPE, bias — all from `evaluate_gauge.py` + +### How to verify during collection +```bash +grep "gauge_accuracy" ~/Documents/Visual_Inspection_ws/evaluation/capture_log.csv +# → ground_truth_value column MUST have actual numbers (2.5, 4.0, 7.0...) +# → NOT "N/A" — if you see N/A for gauge_accuracy rows, that run is useless +``` + +### Readings to collect +Cover the full gauge range with at least 6 different readings, 3 images each minimum: +`0, 25%, 50%, 75%, 100%` of scale + a few intermediate points. + +--- + +## PRIORITY 5 — VLM EVALUATION IMAGES + +### What we evaluate with this dataset +**VLM (Gemini) PASS/FAIL decision accuracy.** +Metrics: Accuracy, Precision, Recall, F1 per object type. +Also: BERTScore (text quality) and LLM-as-judge (needs your written caption). + +### Parameters that must be stored + +**In CSV (automatic):** +- `ibvs_time_s`, `converged`, `detection_confidence` — capture quality + +**In CSV — YOU MUST ENTER:** +- `ground_truth_value` — `PASS` or `FAIL` (not a number — write the word) + **Without this, you cannot compute accuracy/recall/F1.** +- `notes` — a **one-sentence description of exactly what the camera sees** + Example: `"Red fire extinguisher on wall, large brown box placed directly in front blocking access"` + **This is the ground truth caption for BERTScore and LLM-as-judge.** + +**As image files (automatic):** +- Logitech ROI images → sent to server by `evaluate_vlm.py` → get predicted decisions + +**Computed offline (NOT in CSV):** +- VLM predicted decision (PASS/FAIL/UNKNOWN) +- VLM confidence, summary, findings +- Accuracy, Precision, Recall, F1 — from `evaluate_vlm.py` +- BERTScore — compares VLM summary to your `notes` caption +- LLM-as-judge score — from `evaluate_vlm_llm_judge.py` + +### How to verify during collection +```bash +grep "vlm_eval" ~/Documents/Visual_Inspection_ws/evaluation/capture_log.csv +# → ground_truth_value column should say "PASS" or "FAIL" (not N/A) +# → notes column should have a human-readable description (not empty) +``` + +### Scenarios needed (per object type) +| Object | PASS setup | FAIL setup | Min images each | +|---|---|---|---| +| fire_extinguisher | on wall, nothing blocking | box/chair directly in front | 10 | +| emergency_exit | clear walkway | chairs stacked blocking door | 10 | +| door | normal open or closed | objects leaning against it | 10 | +| main_cylinder | dry floor | small water puddle near base | 10 | + +--- + +## PRIORITY 6 — OCCLUSION EVALUATION + +### What we evaluate with this dataset +**Maximum tolerable occlusion before detection/IBVS fails.** +Produces: Detection rate vs occlusion curve, IBVS success rate vs occlusion. +Also: SSIM/AlexNet drop vs occlusion (from images vs reference). + +### Parameters that must be stored + +**In CSV (automatic):** +- `converged` — key: at what % does success rate drop below 80%? +- `detection_confidence` — key: how does YOLO confidence drop? +- `final_error_px` — does error increase with occlusion? +- `occlusion_pct` — **critical: enter the exact level you applied** + +**As image files (automatic):** +- Logitech ROI images → compared to reference for SSIM drop + +**You enter:** +- `occlusion_pct` — must match what you physically applied + +### How to verify during collection +```bash +grep "occlusion" ~/Documents/Visual_Inspection_ws/evaluation/capture_log.csv +# → occlusion_pct column should have: 0, 25, 50, 75 (not all zeros) +# → detection_confidence should DROP as occlusion_pct increases +# → converged should go False at some point (that's the finding!) +``` + +### Levels: 0%, 25%, 50%, 75%, 90% +- Object: fire_extinguisher (easiest to partially cover) +- Distance: 2.0m, angle: 0° +- Cover bottom-up with cardboard +- 10 images per level + +--- + +## PRIORITY 7 — MULTI-OBJECT SCENES + +### What we evaluate with this dataset +**ByteTrack multi-object tracking correctness.** +Confirms system handles 2+ objects: correct class labels, correct inspection order, correct count. + +### Parameters that must be stored + +**In CSV (automatic):** +- `objects_inspected` — should equal number of objects in scene +- `n_objects` — **enter how many objects you placed** +- `converged` — did IBVS succeed for each object? + +**You enter:** +- `n_objects` — how many objects actually in scene (2 or 3) +- `notes` — describe the scene ("2 fire extinguishers side by side, 0.5m apart") + +### How to verify during collection +```bash +grep "multi_object" ~/Documents/Visual_Inspection_ws/evaluation/capture_log.csv +# → n_objects should be 2 or 3 +# → objects_inspected should match n_objects (if both detected and converged) +``` + +--- + +## QUICK VERIFICATION CHECKLIST — Run after every session + +```bash +# 1. Is the CSV growing? +wc -l ~/Documents/Visual_Inspection_ws/evaluation/capture_log.csv + +# 2. Are ibvs values real (not all 0.0)? +awk -F',' '{print $10,$12,$13}' ~/Documents/Visual_Inspection_ws/evaluation/capture_log.csv | tail -10 +# → ibvs_time_s (col 10) should NOT be all 0.0 +# → final_error_px (col 12) should NOT be all 0.0 +# → converged (col 13) should be True or False + +# 3. Are images being saved? +ls ~/Documents/Visual_Inspection_ws/captures/inspection/ | wc -l +# → Should increase after each capture session + +# 4. For gauge session — ground truth filled in? +grep "gauge_accuracy" ~/Documents/Visual_Inspection_ws/evaluation/capture_log.csv | awk -F',' '{print $18}' +# → Should show actual numbers (2.5, 4.0...) NOT "N/A" + +# 5. For VLM session — PASS/FAIL and caption filled in? +grep "vlm_eval" ~/Documents/Visual_Inspection_ws/evaluation/capture_log.csv | awk -F',' '{print $18,$19}' +# → Should show "PASS" or "FAIL" and a text caption +``` + +--- + +## AFTER COLLECTION — Export everything + +```bash +# From Jetson, copy to laptop: +scp -r rgen@192.168.8.181:~/Documents/Visual_Inspection_ws/evaluation/ \ + /home/dinethra/Jetson_orin_nano/Evaluation_V_I_ws/eval_dataset/ + +scp -r rgen@192.168.8.181:~/Documents/Visual_Inspection_ws/captures/ \ + /home/dinethra/Jetson_orin_nano/Evaluation_V_I_ws/captures/ +``` + +Then run offline evaluation scripts on laptop/server: +- `evaluate_image_quality.py` → SSIM, PSNR, VIF, AlexNet (needs reference + test images) +- `evaluate_gauge.py` → MAE, RMSE (needs images + ground_truth_value from CSV) +- `evaluate_vlm.py` → Accuracy, Recall, F1 (needs images + PASS/FAIL from CSV) +- `evaluate_vlm_llm_judge.py` → LLM judge (needs images + notes captions from CSV) + +--- + +## WHAT THE CURRENT LOG ROW GIVES YOU + +``` +2026-04-18_19:20:24, reference, fire_ref_28.jpg, fire_extinguisher, +1, 0, center, 0, 1, +ibvs_time=1.12, ibvs_fps=0.0, final_error=9.85, converged=True, +coarse=0.0, pipeline=0.0, conf=0.8975, inspected=1, N/A, +``` + +| Field | Value | Usable? | +|---|---|---| +| ibvs_time_s | 1.12 s | ✅ Real — use for Table 1 | +| final_error_px | 9.85 px | ✅ Real — use for Table 1 | +| converged | True | ✅ Real — use for success rate | +| detection_confidence | 0.8975 | ✅ Real — use for Table 2 | +| ibvs_fps | 0.0 | ⚠️ Bug — skip this column | +| coarse_time_s | 0.0 | ⚠️ Not instrumented — skip | +| pipeline_time_s | 0.0 | ⚠️ Bug — skip this column | +| SSIM / PSNR / VIF | not in CSV | → compute from saved images offline | + +**The reference session images that are saving alongside this log ARE sufficient. +Continue collecting — you are not losing data.** + +--- + +*Last updated: 2026-04-22 | Robot: Go2 + Jetson Orin Nano* \ No newline at end of file diff --git a/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/Evaluation_V_I_ws/evaluation_plan.md b/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/Evaluation_V_I_ws/evaluation_plan.md new file mode 100644 index 00000000..de818917 --- /dev/null +++ b/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/Evaluation_V_I_ws/evaluation_plan.md @@ -0,0 +1,721 @@ +# Visual Inspection System — Full Evaluation Plan + +**Robot:** Go2 quadruped with Jetson Orin Nano, Insta360 (tall pole mount), Logitech C920 on 3D-printed pan-tilt +**System:** Insta360 → YOLO → Coarse (polynomial) → IBVS → Capture → Server (Gauge / VLM) + +--- + +## Why This Evaluation Plan + +The system has **three distinct pipelines** to evaluate: +1. **ROI Capture Pipeline** (Jetson) — does it reliably point the camera and get a usable image? +2. **Gauge Reading Pipeline** (Server) — is the analog reading numerically accurate? +3. **VLM Decision Pipeline** (Server) — are the inspection decisions (PASS/FAIL) correct? + +Each pipeline has different failure modes and needs different metrics. The metrics your friend suggested are **all valid** — we just need to map each one to the right pipeline. Some (SSIM, PSNR, VIF, AlexNet cosine) measure **image quality**. Others (BERTScore, LLM-as-judge) measure **language output correctness**. Several (occlusion, angle, distance) are **robustness test conditions** rather than metrics themselves. + +--- + +## PART 1: ROI Capture Pipeline Evaluation + +**Goal:** Prove that the Insta360 → YOLO → Coarse → IBVS → Logitech capture chain reliably delivers a high-quality, well-centered image of the target object to the downstream pipelines. + +--- + +### 1.1 IBVS Centering Performance + +**What it is:** How accurately and how quickly does the IBVS PID loop center the target object in the Logitech frame? + +**Metrics:** + +| Metric | Description | How to compute | Target | +|--------|-------------|----------------|--------| +| Convergence Time (s) | Time from coarse position → final error < 10px | Log `time.time()` at coarse done and IBVS done | < 5s | +| Final Pixel Error (px) | Distance of object center from frame center at convergence | `sqrt(ex² + ey²)` at the moment convergence is declared | < 10px | +| Convergence Rate (%) | Percentage of runs that successfully converge within 40s | `n_success / n_total * 100` | > 90% | +| IBVS FPS | Detection frames per second during IBVS loop | Count frames per second in the IBVS while loop | ~15-20 FPS | + +**Test conditions:** + +| Variable | Levels | +|----------|--------| +| Distance from target | 1 m, 2 m, 3 m | +| Object class | gauge, fire_extinguisher, door | +| Approach angle | 0° (head-on), 15°, 30°, 45° | +| Occlusion level | 0% (clear), 25%, 50% occluded | +| Scene | 1 object, 2 objects, 3 objects | + +**How to collect data (add to `ibvs_action_server.py`):** + +```python +import csv +from pathlib import Path + +eval_log = Path('~/eval_results/capture_eval.csv').expanduser() +eval_log.parent.mkdir(exist_ok=True) + +# After IBVS completes: +with open(eval_log, 'a', newline='') as f: + writer = csv.writer(f) + writer.writerow([ + datetime.now().isoformat(), + cls_name, # object class + distance_m, # set manually before each run (1/2/3) + approach_angle, # set manually (0/15/30/45) + occlusion_pct, # set manually (0/25/50) + n_objects, # how many objects in scene + ibvs_time_s, # seconds to converge + final_pixel_error, # px at convergence + converged, # True or False + coarse_time_s, # time for coarse stage + ]) +``` + +--- + +### 1.2 Image Quality Metrics + +**Goal:** Prove that the captured Logitech image is sharp enough, bright enough, and similar enough to a reference to be useful as input to the gauge/VLM pipelines. + +First, capture a **reference image** for each object: place the object at 1m, run IBVS, capture after full autofocus — this is your "gold standard" image. All subsequent captured images are compared to this reference. + +--- + +#### 1.2.1 SSIM — Structural Similarity Index + +**What it is:** Compares two images across three channels: luminance, contrast, and structural information. SSIM = 1 means identical images. It correlates well with human perception of image quality. + +**Why relevant here:** If the IBVS centering is slightly off, or the autofocus didn't work, or there is motion blur, SSIM will drop. Tells us if the captured ROI is visually close to the ideal reference. + +**Range:** -1 to 1 (higher is better). Typically > 0.7 is considered acceptable. + +**How to compute:** +```python +from skimage.metrics import structural_similarity as ssim +import cv2 + +ref = cv2.imread('reference_gauge.jpg', cv2.IMREAD_GRAYSCALE) +cap = cv2.imread('captured_gauge.jpg', cv2.IMREAD_GRAYSCALE) +# Resize to same dimensions +cap_r = cv2.resize(cap, (ref.shape[1], ref.shape[0])) +score, _ = ssim(ref, cap_r, full=True) +print(f"SSIM: {score:.4f}") +``` + +**Expected target:** SSIM > 0.70 for all successful captures. + +--- + +#### 1.2.2 PSNR — Peak Signal-to-Noise Ratio + +**What it is:** Measures image quality in decibels (dB). Comes from signal processing — compares the max possible pixel value (the "peak signal") to the noise (pixel-level differences between captured and reference). Higher dB = less noise/distortion. + +**Why relevant here:** Detects blurriness, noise, or exposure issues in the captured image. Directly affects whether OCR can read gauge numbers. + +**Scale:** Typically: +- > 40 dB = excellent quality +- 30–40 dB = good +- 20–30 dB = acceptable +- < 20 dB = poor + +**How to compute:** +```python +import cv2 + +ref = cv2.imread('reference_gauge.jpg') +cap = cv2.imread('captured_gauge.jpg') +cap_r = cv2.resize(cap, (ref.shape[1], ref.shape[0])) +psnr_val = cv2.PSNR(ref, cap_r) +print(f"PSNR: {psnr_val:.2f} dB") +``` + +**Expected target:** PSNR > 30 dB for gauge images to be readable. + +--- + +#### 1.2.3 YCbCr Y-Channel (Luminance) Comparison + +**What it is:** Convert both images from BGR to YCbCr colour space. **Y = luminance (brightness/illuminance), Cb = blue-difference chrominance, Cr = red-difference chrominance.** By extracting only the Y channel, we isolate brightness differences from colour differences — this is important because our inspection objects are judged in terms of their features, not their colour. + +**Why relevant here:** The Logitech camera may auto-expose differently at different distances or angles. Two structurally identical captures with different brightness will have low SSIM/PSNR even though the content is the same. Y-channel comparison strips out colour and focuses on lighting quality and sharpness. + +**How to compute:** +```python +import cv2 +import numpy as np +from skimage.metrics import structural_similarity as ssim + +ref = cv2.imread('reference_gauge.jpg') +cap = cv2.imread('captured_gauge.jpg') +cap_r = cv2.resize(cap, (ref.shape[1], ref.shape[0])) + +# Convert to YCbCr +ref_ycbcr = cv2.cvtColor(ref, cv2.COLOR_BGR2YCrCb) +cap_ycbcr = cv2.cvtColor(cap_r, cv2.COLOR_BGR2YCrCb) + +# Extract Y (luminance) channel +ref_y = ref_ycbcr[:, :, 0] +cap_y = cap_ycbcr[:, :, 0] + +# Compare luminance directly +y_ssim, _ = ssim(ref_y, cap_y, full=True) +y_mean_diff = abs(float(ref_y.mean()) - float(cap_y.mean())) + +print(f"Y-channel SSIM: {y_ssim:.4f}") +print(f"Y-channel mean brightness: ref={ref_y.mean():.1f}, cap={cap_y.mean():.1f}, diff={y_mean_diff:.1f}") +``` + +**Reports:** Mean Y value (is image too dark / too bright?), Y-channel SSIM (structural similarity ignoring colour). Ideal Y range: 80–180 (avoids very dark < 80 or overexposed > 200). + +--- + +#### 1.2.4 VIF — Visual Information Fidelity + +**What it is:** A more advanced perceptual image quality metric based on **natural scene statistics** and the **human visual system model (HVS)**. Unlike PSNR (which counts pixel errors) or SSIM (which compares structural features), VIF models how much *visual information* from the reference image is preserved in the test image, accounting for how the human eye perceives that information. Range: 0 to 1+ (higher is better, 1 = reference itself). + +**Why relevant here:** VIF is more sensitive to blur and distortion in fine details (like gauge numbers or scale markings) than SSIM or PSNR. A gauge image that looks "okay" by eye but has blurred numbers will score low on VIF — which correctly predicts OCR failure. + +**How to compute:** +```python +# Install: pip install piq +import piq +import torch +import cv2 + +ref = cv2.imread('reference_gauge.jpg') +cap = cv2.imread('captured_gauge.jpg') +cap_r = cv2.resize(cap, (ref.shape[1], ref.shape[0])) + +# Convert to torch tensors [B, C, H, W], float32, 0-1 +def to_tensor(img): + img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB).astype('float32') / 255.0 + return torch.from_numpy(img).permute(2, 0, 1).unsqueeze(0) + +ref_t = to_tensor(ref) +cap_t = to_tensor(cap_r) + +vif_score = piq.vif_p(ref_t, cap_t, data_range=1.0) +print(f"VIF: {vif_score.item():.4f}") +``` + +**Expected target:** VIF > 0.4 for images with readable gauge detail. + +--- + +#### 1.2.5 AlexNet Cosine Similarity + +**What it is:** Extract deep CNN features from both images using **AlexNet** (pretrained on ImageNet) and measure **cosine similarity** between the feature vectors. Cosine similarity = dot product of two unit vectors, ranging from -1 (opposite) to 1 (identical direction). + +Unlike pixel metrics (SSIM, PSNR), AlexNet features capture **semantic and perceptual similarity** — two images of the same gauge from slightly different angles will have high cosine similarity even though pixels differ. This validates that the camera is pointing at the **correct object**, not just capturing a similar-brightness image. + +**Why relevant here:** Validates the full system end-to-end — after Insta360 detection, coarse pointing, and IBVS, is the captured image semantically about the same object as the reference? High cosine similarity confirms IBVS converged onto the right target. + +**How to compute:** +```python +import torch +import torchvision.models as models +import torchvision.transforms as transforms +import torch.nn.functional as F +from PIL import Image + +# Load AlexNet (pretrained), remove final classifier layers +alexnet = models.alexnet(pretrained=True) +feature_extractor = torch.nn.Sequential(*list(alexnet.features.children())) +feature_extractor.eval() + +preprocess = transforms.Compose([ + transforms.Resize(256), + transforms.CenterCrop(224), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), +]) + +def get_features(img_path): + img = Image.open(img_path).convert('RGB') + t = preprocess(img).unsqueeze(0) + with torch.no_grad(): + feat = feature_extractor(t) + return feat.flatten() + +ref_feat = get_features('reference_gauge.jpg') +cap_feat = get_features('captured_gauge.jpg') + +cos_sim = F.cosine_similarity(ref_feat.unsqueeze(0), cap_feat.unsqueeze(0)) +print(f"AlexNet Cosine Similarity: {cos_sim.item():.4f}") +``` + +**Expected target:** Cosine similarity > 0.85 confirms camera is looking at the correct object with good centering. + +--- + +### 1.3 Robustness Evaluation + +#### 1.3.1 Occlusion Robustness + +**What it is:** Systematically test how much occlusion the system can tolerate before detection fails, IBVS fails to converge, or image quality drops below threshold. + +**Setup:** Use tape/cardboard/boxes to cover known fractions of the target object during testing. + +| Occlusion Level | How to simulate | Measure | +|-----------------|----------------|---------| +| 0% (baseline) | Nothing blocking | All metrics as baseline | +| 25% | Cover bottom quarter of object | Does YOLO still detect? IBVS still converge? | +| 50% | Cover half of object | Same questions | +| 75% | Cover three-quarters | At what point does detection fail? | + +**Metrics to record at each occlusion level:** +- YOLO detection confidence score +- IBVS convergence success rate (%) +- Final pixel error at convergence +- SSIM of captured image vs reference +- AlexNet cosine similarity + +**Expected outcome:** Plot occlusion % vs detection confidence. Find the **critical occlusion threshold** where detection drops below 0.3 confidence or convergence rate drops below 80%. + +--- + +#### 1.3.2 Angle Robustness + +**What it is:** Test system performance when the robot approaches the target at different horizontal and vertical angles. The Insta360 + coarse mapping was calibrated roughly — offsets at large angles will be bigger, requiring more IBVS correction. + +| Test | Angles | +|------|--------| +| Horizontal approach angle | 0°, 15°, 30°, 45° left/right | +| Vertical tilt (height difference) | Target at eye level, 30cm above, 30cm below | +| Combined | 30° horizontal + 20cm height offset | + +**Metrics:** +- Coarse position offset (how far off is the first servo move?) +- IBVS convergence time at each angle (longer = more correction needed) +- Final pixel error at convergence +- Success rate per angle + +**Expected outcome:** Convergence time increases with angle. Find the **maximum usable angle** where system still converges reliably. + +--- + +#### 1.3.3 Distance Evaluation + +| Distance | Expected FPS | Expected Conv. Time | Expected Image Quality | +|----------|-------------|---------------------|----------------------| +| 1 m | ~20 FPS | < 3s | High (SSIM > 0.80) | +| 2 m | ~20 FPS | 3-5s | Good (SSIM > 0.70) | +| 3 m | ~18 FPS | 4-7s | Acceptable (SSIM > 0.60) | +| 4 m | ~18 FPS | 5-9s | Poor (likely gauge OCR fails) | + +--- + +## PART 2: Gauge Reading Pipeline Evaluation + +**Goal:** Prove that the 10-step geometric+DL pipeline accurately reads analog gauge values. + +### Pipeline Reminder (10 steps): +1. YOLO detects gauge face bounding box +2. Crop + square pad + resize to 448×448 +3. Heatmap key point model detects notch/tick marks + start/end scale markers +4. Algebraic ellipse fitted to notch ring +5. Zero-point reference angle computed from start/end markers +6. PaddleOCR reads scale numbers (warp + rotation corrected) +7. Decimal point detector checks ROIs around each number, corrects (e.g. "25"→"2.5") +8. Segmentation model finds needle pixels, fits a line +9. OCR numbers + needle projected onto ellipse in polar coordinates (angle per number + needle angle) +10. RANSAC line fit maps (angle→value), needle angle → final reading + +### 2.1 Accuracy Metrics + +| Metric | Formula | Target | +|--------|---------|--------| +| MAE (Mean Absolute Error) | mean(|pred - true|) | < 5% of gauge full range | +| RMSE (Root Mean Squared Error) | sqrt(mean((pred-true)²)) | < 5% of gauge full range | +| MAPE (Mean Absolute % Error) | mean(|pred-true|/true × 100%) | < 5% | +| Success Rate | n_readings / n_total | > 80% | +| Over/Under Read Bias | mean(pred - true) | Near 0 (no systematic bias) | + +### 2.2 Failure Mode Breakdown + +Track which step fails using the error log files the gauge pipeline writes: + +| Failure Mode | How detected | Expected frequency | +|-------------|-------------|-------------------| +| Gauge not detected | YOLO conf < threshold | < 5% | +| Ellipse fit failed ("not an ellipse" error) | Exception in step 4 | < 10% | +| OCR found no numbers | `OCR_NONE_DETECTED_KEY` in errors | < 10% | +| Segmentation failed (no needle) | `SEGMENTATION_FAILED_KEY` | < 5% | +| RANSAC insufficient points | Too few OCR numbers | < 10% | + +### 2.3 Data Collection Script + +```python +# evaluate_gauge.py — run on server with server running +import requests, json, time, csv, statistics +from pathlib import Path + +SERVER = "http://localhost:8001" + +# Build ground truth dict: {image_path: true_reading} +# You create this by setting gauge to known positions and photographing +ground_truth = { + "eval_images/gauge_0.5.jpg": 0.5, + "eval_images/gauge_1.0.jpg": 1.0, + "eval_images/gauge_2.5.jpg": 2.5, + "eval_images/gauge_5.0.jpg": 5.0, + # ... add all your test images +} + +results = [] +for img_path, true_val in ground_truth.items(): + with open(img_path, 'rb') as f: + r = requests.post(f"{SERVER}/api/v1/jobs", + files={"file": f}, + data={"object_type": "gauge"}) + job_id = r.json()["job_id"] + t0 = time.time() + + while True: + job = requests.get(f"{SERVER}/api/v1/jobs/{job_id}").json() + if job["status"] in ["DONE", "FAILED"]: break + time.sleep(2) + + elapsed = round(time.time() - t0, 1) + pred_val, unit, err = None, None, None + + if job["status"] == "DONE": + data = json.loads(job["result_json"]) + pred_val = data.get("reading") + unit = data.get("unit") + if pred_val is not None: + err = abs(pred_val - true_val) + + results.append({"image": img_path, "true": true_val, "predicted": pred_val, + "unit": unit, "error": err, "time_s": elapsed, + "status": job["status"]}) + print(f" {'✓' if err and err < 0.5 else '✗'} {img_path}: " + f"true={true_val}, pred={pred_val}, err={err}") + +# Save CSV +with open("gauge_eval_results.csv", "w", newline="") as f: + writer = csv.DictWriter(f, fieldnames=results[0].keys()) + writer.writeheader() + writer.writerows(results) + +# Summary +good = [r for r in results if r["error"] is not None] +errors = [r["error"] for r in good] +print(f"\n=== GAUGE EVALUATION ===") +print(f"Total: {len(results)} | Success: {len(good)} ({len(good)/len(results)*100:.0f}%)") +print(f"MAE: {statistics.mean(errors):.4f}") +print(f"RMSE: {(sum(e**2 for e in errors)/len(errors))**0.5:.4f}") +print(f"Bias: {statistics.mean(r['predicted']-r['true'] for r in good):.4f}") +print(f"Avg time: {statistics.mean(r['time_s'] for r in results):.1f}s") +``` + +--- + +## PART 3: VLM Pipeline Evaluation + +**Goal:** Prove that the Google Gemini 2.5 Flash based VLM correctly makes PASS/FAIL/UNKNOWN inspection decisions across all object types. + +### Routing Tree Reminder: +- `gauge` → gauge pipeline (not VLM) +- `fire_extinguisher` → VLM with specific prompt (present? accessible?) +- `door` → VLM with specific prompt (open/closed state?) +- `emergency_exit` → VLM with specific prompt (exit blocked?) +- `main_cylinder` → VLM with specific prompt (oil leak?) +- `unknown` → VLM with smart auto-detect prompt → identifies object → applies rules → if gauge, recommends gauge pipeline + +### 3.1 Standard Classification Metrics + +| Metric | Formula | Target | +|--------|---------|--------| +| Overall Accuracy | % correct decisions | > 85% | +| Precision (FAIL class) | TP_fail / (TP_fail + FP_fail) | > 80% | +| **Recall (FAIL class)** | TP_fail / (TP_fail + FN_fail) | **> 90%** (safety critical — must not miss failures) | +| F1 Score (FAIL) | 2 × Prec × Rec / (Prec + Rec) | > 0.85 | +| Response Time | seconds from POST to DONE | < 15s | +| Consistency | same image × 3 → same decision? | > 95% | + +> **Why recall matters more than precision for FAIL:** Missing a blocked fire exit (False Negative) is a safety risk. Incorrectly flagging a clear exit as blocked (False Positive) just means manual re-check. So **recall > precision** priority. + +### 3.2 BERTScore — Text Quality Evaluation + +**What it is:** A metric from NLP that measures how well two pieces of text match semantically, using **BERT embeddings**. Instead of counting matching words (like BLEU), BERTScore compares contextual meaning. Gives Precision, Recall, F1 between 0 and 1. + +**Why relevant here:** The VLM returns not just a decision but a natural language `summary` (e.g. "Fire extinguisher is present and the path is clear") and `findings` list. We want to verify these text outputs are **accurate and complete** descriptions of the scene — not just that the binary PASS/FAIL is right. + +**How to use it:** +1. Build a **captioned dataset**: for each test image, write a ground truth description (e.g. "Red fire extinguisher mounted on wall, clearly accessible, no objects blocking it") +2. Compare VLM's generated `summary` against your written caption using BERTScore + +```python +# Install: pip install bert-score +from bert_score import score + +# Ground truth descriptions (you write these for each image) +references = [ + "Fire extinguisher is mounted on wall and fully accessible with no obstructions", + "Emergency exit door is blocked by a large cardboard box placed directly in front", + "No oil leak visible, floor surface around main cylinder is dry and clean", +] + +# VLM output summaries (from result_json["summary"]) +candidates = [ + "Fire extinguisher appears present and accessible with clear access path", + "Exit appears to be obstructed by objects placed in front of the door", + "Area appears dry with no visible oil leaks or puddles near the cylinder", +] + +P, R, F1 = score(candidates, references, lang="en", verbose=False) +for i, (p, r, f) in enumerate(zip(P, R, F1)): + print(f"Image {i+1}: BERTScore P={p:.3f} R={r:.3f} F1={f:.3f}") +``` + +**Expected target:** BERTScore F1 > 0.80 means VLM descriptions semantically match ground truth. + +--- + +### 3.3 LLM as a Judge + +**What it is:** Instead of comparing VLM text to a fixed reference, you use a **stronger LLM (e.g. GPT-4o or Gemini 1.5 Pro)** as an evaluator. You give it: (1) the image, (2) a ground truth human caption describing the scene, (3) the VLM's full output. The judge LLM then rates whether the inspection report is CORRECT / PARTIALLY_CORRECT / INCORRECT and explains why. + +**Why better than BERTScore:** BERTScore only compares text. LLM-as-judge can look at the actual image AND the VLM output and reason about whether the evidence, findings, and decision are all consistent with what is visible. More robust to paraphrasing and catches logical errors. + +**How to implement:** + +```python +import google.generativeai as genai +import json +from PIL import Image + +genai.configure(api_key="YOUR_GEMINI_KEY") +judge_model = genai.GenerativeModel("gemini-1.5-pro") + +def judge_vlm_output(image_path, ground_truth_caption, vlm_output_dict): + img = Image.open(image_path) + + judge_prompt = f"""You are an expert evaluator for an industrial visual inspection AI system. + +GROUND TRUTH (what a human expert says about this image): +"{ground_truth_caption}" + +VLM INSPECTION REPORT (what the AI system produced): +Decision: {vlm_output_dict.get('decision')} +Confidence: {vlm_output_dict.get('confidence')} +Summary: {vlm_output_dict.get('summary')} +Findings: {vlm_output_dict.get('findings')} +Evidence: {vlm_output_dict.get('evidence')} + +Your task: +1. Look at the image carefully +2. Compare the AI report against the ground truth +3. Rate the report as: CORRECT / PARTIALLY_CORRECT / INCORRECT +4. Explain specifically what is right and what is wrong + +Respond in JSON: {{"rating": "CORRECT/PARTIALLY_CORRECT/INCORRECT", "explanation": "...", "decision_correct": true/false, "findings_accurate": true/false}} +""" + + response = judge_model.generate_content([judge_prompt, img]) + return json.loads(response.text) + +# Example usage +result = judge_vlm_output( + "eval_images/fire_ext_blocked.jpg", + "Fire extinguisher is present on wall but blocked by a large blue bin placed in front of it", + {"decision": "FAIL", "confidence": 0.82, + "summary": "Fire extinguisher is present but access is blocked", + "findings": ["Red extinguisher visible", "Blue container blocking access"], + "evidence": {"present": True, "blocked": True}} +) +print(result) +# → {"rating": "CORRECT", "explanation": "Decision and findings match the scene", ...} +``` + +**Report:** % CORRECT, % PARTIALLY_CORRECT, % INCORRECT per object_type. + +--- + +### 3.4 VLM Consistency Test + +**What it is:** Send the same image 3 times and check if the decision, confidence, and key findings are consistent. + +**Why it matters:** Gemini API is non-deterministic. If the system gives PASS one time and FAIL another on the same image, it is unreliable. + +```python +# Send same image 3 times +decisions = [] +for _ in range(3): + # POST image → wait → get result + decision = result_json["decision"] + decisions.append(decision) + time.sleep(2) + +consistency = len(set(decisions)) == 1 # all same? +print(f"Consistency: {decisions} → {'✓ Consistent' if consistency else '✗ Inconsistent'}") +``` + +**Target:** > 95% of images should give the same decision across 3 runs. + +--- + +### 3.5 VLM Data Collection Script + +```python +# evaluate_vlm.py +import requests, json, time, csv +from pathlib import Path +from collections import defaultdict + +SERVER = "http://localhost:8001" + +# Ground truth: {image_path: (object_type, expected_decision, human_caption)} +ground_truth = { + "vlm_eval/ext_clear.jpg": ("fire_extinguisher", "PASS", "Extinguisher accessible"), + "vlm_eval/ext_blocked.jpg": ("fire_extinguisher", "FAIL", "Extinguisher blocked by box"), + "vlm_eval/exit_clear.jpg": ("emergency_exit", "PASS", "Exit path clear"), + "vlm_eval/exit_blocked.jpg": ("emergency_exit", "FAIL", "Exit blocked by equipment"), + "vlm_eval/oil_leak.jpg": ("main_cylinder", "FAIL", "Oil puddle on floor"), + "vlm_eval/no_leak.jpg": ("main_cylinder", "PASS", "Floor dry and clean"), + "vlm_eval/door_open.jpg": ("door", "PASS", "Door open state visible"), + "vlm_eval/door_closed.jpg": ("door", "PASS", "Door closed state visible"), + # Add at least 10 per class +} + +results = [] +for img_path, (obj_type, true_dec, caption) in ground_truth.items(): + with open(img_path, 'rb') as f: + r = requests.post(f"{SERVER}/api/v1/jobs", + files={"file": f}, + data={"object_type": obj_type}) + job_id = r.json()["job_id"] + t0 = time.time() + while True: + job = requests.get(f"{SERVER}/api/v1/jobs/{job_id}").json() + if job["status"] in ["DONE", "FAILED"]: break + time.sleep(1) + + elapsed = round(time.time() - t0, 1) + data = json.loads(job.get("result_json", "{}")) + pred = data.get("decision", "UNKNOWN") + conf = data.get("confidence", 0.0) + correct = (pred == true_dec) + + results.append({"image": img_path, "type": obj_type, "true": true_dec, + "pred": pred, "conf": conf, "correct": correct, + "time_s": elapsed, "caption": caption, + "summary": data.get("summary", "")[:100]}) + print(f" {'✓' if correct else '✗'} {obj_type}: {true_dec} → {pred} ({conf:.2f})") + +# Summary per type +by_type = defaultdict(list) +for r in results: + by_type[r["type"]].append(r) + +print(f"\n=== VLM EVALUATION ===") +for t, rows in by_type.items(): + acc = sum(r["correct"] for r in rows) / len(rows) * 100 + avg_t = sum(r["time_s"] for r in rows) / len(rows) + print(f" {t}: {acc:.0f}% accuracy | avg {avg_t:.1f}s") + +# FAIL recall/precision +tp = sum(1 for r in results if r["true"]=="FAIL" and r["pred"]=="FAIL") +fp = sum(1 for r in results if r["true"]!="FAIL" and r["pred"]=="FAIL") +fn = sum(1 for r in results if r["true"]=="FAIL" and r["pred"]!="FAIL") +prec = tp/(tp+fp) if tp+fp > 0 else 0 +rec = tp/(tp+fn) if tp+fn > 0 else 0 +print(f"\nFAIL detection — Precision: {prec:.2f} Recall: {rec:.2f}") +``` + +--- + +## PART 4: Full End-to-End System Tests + +| Test Scenario | Steps | Pass Criteria | +|---------------|-------|---------------| +| Full gauge inspection | Physical gauge → Insta360 detects → servo moves → IBVS → capture → send to server → reading | Reading within 10% of true value | +| Front zone detection | Object in front (cy < 200px) | `success=True, object_in_back=False` | +| Back zone detection | Object behind robot | `success=False, object_in_back=True` → BT rotates | +| Multi-object (2 objects) | 2 objects in Insta360 view | Both inspected, 2 ROI sets captured | +| Object disappears during IBVS | Remove object after coarse | `failed_reason="ibvs_timeout"` | +| Empty scene | No objects in view | `failed_reason="no_detection"` | +| MQTT delivery | Full pipeline with internet | ThingsBoard shows telemetry with image | +| Unknown object → VLM routing | Send image with `object_type=unknown` | VLM correctly identifies and inspects | + +--- + +## PART 5: Summary Results Tables + +Fill these in after running evaluations: + +### Table 1 — Capture Pipeline + +| Object | Distance | Conv. Time | Final Error | Success % | SSIM | PSNR | AlexNet Sim | +|--------|----------|-----------|-------------|-----------|------|------|-------------| +| Gauge | 1m | __ s | __ px | _% | __ | __ dB | __ | +| Gauge | 2m | __ s | __ px | _% | __ | __ dB | __ | +| Gauge | 3m | __ s | __ px | _% | __ | __ dB | __ | +| Fire ext | 1m | __ s | __ px | _% | __ | __ dB | __ | +| Fire ext | 2m | __ s | __ px | _% | __ | __ dB | __ | + +### Table 2 — Occlusion Robustness + +| Occlusion | YOLO Conf | IBVS Success % | SSIM | AlexNet Sim | +|-----------|----------|----------------|------|-------------| +| 0% | __ | _% | __ | __ | +| 25% | __ | _% | __ | __ | +| 50% | __ | _% | __ | __ | +| 75% | __ | _% | __ | __ | + +### Table 3 — Gauge Reading Accuracy + +| Metric | Value | +|--------|-------| +| N images tested | __ | +| Success rate | _% | +| MAE | __ units | +| RMSE | __ units | +| MAPE | _% | +| Avg processing time | __ s | + +### Table 4 — VLM Decision Accuracy + +| Object Type | N | Accuracy | Precision | Recall | F1 | BERTScore F1 | LLM Judge CORRECT% | +|-------------|---|----------|-----------|--------|----|--------------|--------------------| +| fire_extinguisher | __ | _% | __ | __ | __ | __ | _% | +| emergency_exit | __ | _% | __ | __ | __ | __ | _% | +| main_cylinder | __ | _% | __ | __ | __ | __ | _% | +| door | __ | _% | __ | __ | __ | __ | _% | +| unknown | __ | _% | __ | __ | __ | __ | _% | +| **Overall** | __ | _% | __ | __ | __ | __ | _% | + +--- + +## Dependencies to Install + +```bash +# On server (vi_server venv) +pip install scikit-image # SSIM, PSNR +pip install piq # VIF +pip install torch torchvision # AlexNet cosine similarity +pip install bert-score # BERTScore +pip install google-generativeai # LLM-as-judge (already installed) + +# On Jetson (for capture eval logging) +# No extra installs — uses standard library (csv, time, datetime) +``` + +--- + +## Files to Create + +| File | Location | Purpose | +|------|----------|---------| +| `evaluate_gauge.py` | `vi_server/` | Gauge accuracy evaluation script | +| `evaluate_vlm.py` | `vi_server/` | VLM decision accuracy evaluation script | +| `evaluate_image_quality.py` | `vi_server/` | SSIM, PSNR, VIF, YCBCR, AlexNet for captured images | +| `evaluate_vlm_llm_judge.py` | `vi_server/` | LLM-as-judge scoring for VLM outputs | +| `gauge_eval_results.csv` | `vi_server/` | Raw gauge results | +| `vlm_eval_results.csv` | `vi_server/` | Raw VLM results | +| `capture_eval.csv` | Jetson `~/eval_results/` | IBVS convergence metrics per run | + +--- + +*Plan version: 2026-03-12 | Author: Dinethra | Robot: Go2 + Jetson Orin Nano* \ No newline at end of file diff --git a/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/Evaluation_V_I_ws/laptop_receiver.py b/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/Evaluation_V_I_ws/laptop_receiver.py new file mode 100644 index 00000000..a2e143fe --- /dev/null +++ b/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/Evaluation_V_I_ws/laptop_receiver.py @@ -0,0 +1,133 @@ +""" +laptop_receiver.py — HTTP server that runs on YOUR LAPTOP +========================================================= +Receives images + metadata.json from the Jetson via HTTP POST +and saves them in an organized folder structure. + +HOW TO RUN (on your laptop, connected to YasiruDEX network): + pip install flask + python3 laptop_receiver.py + +HOW TO FIND YOUR LAPTOP IP (on YasiruDEX network): + ip route get 8.8.8.8 | awk '{print $7; exit}' + # → something like 192.168.1.XXX — use this in the Jetson's laptop_url param + +SAVED FOLDER STRUCTURE (on laptop): + received_captures/ + ├── engine_room_A/ ← session_label from BT + │ ├── gauge/ + │ │ └── instance_1/ + │ │ ├── img_01_conf0.85.jpg + │ │ ├── img_02_conf0.85.jpg + │ │ ├── img_03_conf0.85.jpg + │ │ └── metadata.json + │ └── extinguisher/ + │ └── instance_1/ + │ ├── img_01_conf0.90.jpg + │ └── metadata.json +""" + +import os +import json +import pathlib +from datetime import datetime +from flask import Flask, request, jsonify + +# ── Config ──────────────────────────────────────────────────────────────────── +PORT = 8888 +SAVE_ROOT = pathlib.Path(__file__).parent / 'received_captures' +# ───────────────────────────────────────────────────────────────────────────── + +app = Flask(__name__) + + +@app.route('/upload', methods=['POST']) +def upload(): + """ + Receives files from the Jetson image_uploader node. + Form fields: session_label, subfolder, object_class + Files: images + metadata.json + + Folder structure on laptop: + received_captures//// + + location_label is read directly from the metadata.json content. + Falls back to session_label form field if metadata.json not present. + """ + session_label = request.form.get('session_label', 'unknown_session') + subfolder = request.form.get('subfolder', 'instance_1') + object_class = request.form.get('object_class', 'unknown') + + if not request.files: + return jsonify({'success': False, 'info': 'No files in request'}), 400 + + # ── Collect all files in memory first so we can read metadata.json ──────── + collected = {} + for _, file in request.files.items(multi=True): + if file.filename == '': + continue + collected[file.filename] = file.read() + + # ── Extract location_label from metadata.json (if present) ──────────────── + location_label = session_label # default fallback + if 'metadata.json' in collected: + try: + meta = json.loads(collected['metadata.json'].decode('utf-8')) + location_label = meta.get('location_label', session_label) or session_label + object_class = meta.get('class', object_class) + except Exception as e: + print(f'[!] Could not parse metadata.json: {e}') + + # ── Build save path ─────────────────────────────────────────────────────── + # received_captures//// + dest = SAVE_ROOT / location_label / object_class / subfolder + dest.mkdir(parents=True, exist_ok=True) + + # ── Save all files ──────────────────────────────────────────────────────── + saved_files = [] + for filename, data in collected.items(): + save_path = dest / filename + save_path.write_bytes(data) + saved_files.append(filename) + + print(f'[✓] Saved {len(saved_files)} files → {dest}') + print(f' Location: {location_label} Class: {object_class}') + print(f' Files: {saved_files}') + + return jsonify({ + 'success': True, + 'saved_count': len(saved_files), + 'saved_to': str(dest), + 'location_label': location_label, + 'files': saved_files + }), 200 + + + +@app.route('/status', methods=['GET']) +def status(): + """Health-check endpoint — Jetson can ping this to verify server is up.""" + folders = list(SAVE_ROOT.rglob('instance_*')) if SAVE_ROOT.exists() else [] + return jsonify({ + 'status': 'running', + 'save_root': str(SAVE_ROOT), + 'instances': len(folders) + }), 200 + + +if __name__ == '__main__': + SAVE_ROOT.mkdir(parents=True, exist_ok=True) + print('=' * 60) + print(' Visual Inspection — Laptop Receiver') + print('=' * 60) + print(f' Saves to : {SAVE_ROOT.resolve()}') + print(f' Port : {PORT}') + print() + print(' Find your IP (run in another terminal):') + print(' ip route get 8.8.8.8 | awk \'{print $7; exit}\'') + print() + print(' Then on Jetson, run image_uploader with:') + print(' ros2 run visual_inspection_ros image_uploader \\') + print(' --ros-args -p laptop_url:=http://:8888/upload') + print('=' * 60) + app.run(host='0.0.0.0', port=PORT, debug=False) diff --git a/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/Evaluation_V_I_ws/scripts/evaluate_gauge.py b/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/Evaluation_V_I_ws/scripts/evaluate_gauge.py new file mode 100644 index 00000000..bc1f52b5 --- /dev/null +++ b/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/Evaluation_V_I_ws/scripts/evaluate_gauge.py @@ -0,0 +1,148 @@ +#!/usr/bin/env python3 +""" +evaluate_gauge.py +Run on LAPTOP. Requires vi_server running (locally or on server). + +Sends each gauge evaluation image to the server and compares +predicted reading to the true reading in your capture_log.csv. + +Usage: + python3 scripts/evaluate_gauge.py \ + --images eval_dataset/gauge_accuracy/ \ + --log eval_dataset/capture_log.csv \ + --server http://localhost:8001 \ + --out results/gauge_eval_results.csv +""" + +import argparse, csv, json, time, os, statistics, sys +from pathlib import Path +import requests + +def poll_job(server, job_id, timeout=120): + deadline = time.time() + timeout + while time.time() < deadline: + try: + r = requests.get(f'{server}/api/v1/jobs/{job_id}', timeout=10) + job = r.json() + if job.get('status') in ['DONE', 'FAILED']: + return job + except Exception as e: + print(f' poll error: {e}') + time.sleep(2) + return {'status': 'TIMEOUT', 'result_json': '{}'} + +def send_image(server, img_path, object_type='gauge'): + with open(img_path, 'rb') as f: + r = requests.post(f'{server}/api/v1/jobs', + files={'file': ('image.jpg', f, 'image/jpeg')}, + data={'object_type': object_type}, + timeout=30) + return r.json().get('job_id') + +def load_ground_truth(log_csv, images_dir): + """Read ground_truth_value from capture_log.csv for each image filename.""" + gt = {} + if not Path(log_csv).exists(): + print(f'WARNING: capture_log.csv not found at {log_csv}') + print(' Will use filename to parse true value. Name images like: gauge_2.5bar_dist2m_n1.jpg') + return gt + with open(log_csv, newline='') as f: + for row in csv.DictReader(f): + fname = row.get('filename', '') + true_val = row.get('ground_truth_value', '') + if fname and true_val and true_val not in ('', 'N/A', 'PASS', 'FAIL'): + try: + gt[fname] = float(true_val) + except ValueError: + pass + return gt + +def parse_true_from_filename(fname): + """Fallback: parse true value from filename like gauge_2.5bar_dist2m_n1.jpg""" + import re + m = re.search(r'gauge_([0-9.]+)', fname) + if m: + return float(m.group(1)) + return None + +def main(): + ap = argparse.ArgumentParser() + ap.add_argument('--images', required=True) + ap.add_argument('--log', default='eval_dataset/capture_log.csv') + ap.add_argument('--server', default='http://localhost:8001') + ap.add_argument('--out', default='results/gauge_eval_results.csv') + args = ap.parse_args() + + img_dir = Path(args.images) + out_path = Path(args.out) + out_path.parent.mkdir(parents=True, exist_ok=True) + + images = sorted(img_dir.glob('*.jpg')) + sorted(img_dir.glob('*.png')) + if not images: + print(f'No images found in {img_dir}'); sys.exit(1) + + gt = load_ground_truth(args.log, img_dir) + print(f'\nGauge Evaluation: {len(images)} images | server={args.server}') + + results = [] + for img_path in images: + fname = img_path.name + true_val = gt.get(fname) or parse_true_from_filename(fname) + if true_val is None: + print(f' SKIP {fname} — no ground truth value found') + continue + + print(f' Sending {fname} (true={true_val})...', end='', flush=True) + t0 = time.time() + try: + job_id = send_image(args.server, img_path) + job = poll_job(args.server, job_id) + elapsed = round(time.time() - t0, 1) + + if job['status'] == 'DONE': + data = json.loads(job.get('result_json', '{}')) + pred = data.get('reading') + unit = data.get('unit', '') + err = abs(pred - true_val) if pred is not None else None + ok = err is not None and err < (true_val * 0.05 + 0.1) + print(f' pred={pred} {unit} err={err} {"✓" if ok else "✗"}') + else: + pred, unit, err, ok = None, None, None, False + print(f' {job["status"]}') + + except Exception as e: + print(f' ERROR: {e}') + pred, unit, err, ok, elapsed = None, None, None, False, 0 + + results.append({'filename': fname, 'true_value': true_val, + 'predicted': pred, 'unit': unit, 'abs_error': err, + 'within_5pct': ok, 'time_s': elapsed}) + + if not results: + print('No results collected.'); sys.exit(1) + + with open(out_path, 'w', newline='') as f: + writer = csv.DictWriter(f, fieldnames=results[0].keys()) + writer.writeheader() + writer.writerows(results) + + good = [r for r in results if r['abs_error'] is not None] + errors = [r['abs_error'] for r in good] + bias = [r['predicted'] - r['true_value'] for r in good if r['predicted']] + + print(f'\n=== GAUGE EVALUATION SUMMARY ===') + print(f'Total images: {len(results)}') + print(f'Successful reads:{len(good)} ({len(good)/len(results)*100:.0f}%)') + if errors: + mae = statistics.mean(errors) + rmse = (sum(e**2 for e in errors)/len(errors))**0.5 + mape = statistics.mean(e/r['true_value']*100 for r,e in zip(good,errors) if r['true_value']) + print(f'MAE: {mae:.4f} (target < 5% full range)') + print(f'RMSE: {rmse:.4f}') + print(f'MAPE: {mape:.2f}%') + print(f'Bias: {statistics.mean(bias):.4f} (near 0 = no systematic error)') + print(f'Avg time/image: {statistics.mean(r["time_s"] for r in results):.1f}s') + print(f'Results saved to:{out_path}') + +if __name__ == '__main__': + main() diff --git a/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/Evaluation_V_I_ws/scripts/evaluate_ibvs.py b/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/Evaluation_V_I_ws/scripts/evaluate_ibvs.py new file mode 100644 index 00000000..243fd81d --- /dev/null +++ b/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/Evaluation_V_I_ws/scripts/evaluate_ibvs.py @@ -0,0 +1,130 @@ +#!/usr/bin/env python3 +""" +evaluate_ibvs.py +Run on LAPTOP. Reads terminal output log OR reads capture_log.csv +to compute IBVS convergence time statistics. + +Usage Option 1 — from capture_log.csv: + python3 scripts/evaluate_ibvs.py \ + --log eval_dataset/capture_log.csv \ + --out results/ibvs_eval_results.csv + +Usage Option 2 — if you saved terminal output to a file: + python3 scripts/evaluate_ibvs.py \ + --terminal_log terminal_output.txt \ + --out results/ibvs_eval_results.csv +""" + +import argparse, csv, re, sys, statistics +from pathlib import Path +from collections import defaultdict + +def load_from_csv(log_path): + """Load IBVS metrics from capture_log.csv.""" + rows = [] + with open(log_path, newline='') as f: + for row in csv.DictReader(f): + try: + ibvs_t = float(row.get('ibvs_time_s', '')) if row.get('ibvs_time_s','').strip() else None + err_px = float(row.get('final_error_px','')) if row.get('final_error_px','').strip() else None + conv = row.get('converged','').strip().lower() in ('true','yes','1') + rows.append({ + 'folder': row.get('folder',''), + 'filename': row.get('filename',''), + 'object': row.get('object_type',''), + 'distance': row.get('distance_m',''), + 'angle': row.get('angle_deg',''), + 'direction': row.get('angle_direction',''), + 'occlusion': row.get('occlusion_pct',''), + 'ibvs_s': ibvs_t, + 'err_px': err_px, + 'converged': conv + }) + except Exception: + continue + return rows + +def parse_terminal_log(log_path): + """Parse ibvs_time and final error from saved terminal output.""" + rows = [] + text = Path(log_path).read_text() + # Look for lines like: IBVS converged: err=8.1px at iter 42 + for m in re.finditer(r'IBVS converged: err=([\d.]+)px at iter (\d+)', text): + rows.append({'err_px': float(m.group(1)), 'iters': int(m.group(2)), + 'converged': True, 'ibvs_s': None}) + # Look for timeout lines + for m in re.finditer(r'IBVS timeout', text): + rows.append({'err_px': None, 'iters': None, 'converged': False, 'ibvs_s': None}) + return rows + +def summarize(rows, group_by=None): + if group_by: + groups = defaultdict(list) + for r in rows: + groups[r.get(group_by,'unknown')].append(r) + else: + groups = {'all': rows} + + print(f'\n {"Group":<20} {"N":>4} {"Conv%":>6} {"AvgTime":>8} {"AvgErr":>8} {"MaxErr":>8}') + print(f' {"-"*20} {"-"*4} {"-"*6} {"-"*8} {"-"*8} {"-"*8}') + + for key, grp in sorted(groups.items()): + n = len(grp) + conv = [r for r in grp if r['converged']] + conv_r = len(conv)/n*100 if n else 0 + times = [r['ibvs_s'] for r in conv if r.get('ibvs_s')] + errors = [r['err_px'] for r in conv if r.get('err_px')] + avg_t = f'{statistics.mean(times):.1f}s' if times else 'N/A' + avg_e = f'{statistics.mean(errors):.1f}px' if errors else 'N/A' + max_e = f'{max(errors):.1f}px' if errors else 'N/A' + print(f' {str(key):<20} {n:>4} {conv_r:>5.0f}% {avg_t:>8} {avg_e:>8} {max_e:>8}') + +def main(): + ap = argparse.ArgumentParser() + ap.add_argument('--log', default='eval_dataset/capture_log.csv') + ap.add_argument('--terminal_log', default=None) + ap.add_argument('--out', default='results/ibvs_eval_results.csv') + ap.add_argument('--group_by', choices=['distance','angle','occlusion','object'], default=None) + args = ap.parse_args() + out_path = Path(args.out) + out_path.parent.mkdir(parents=True, exist_ok=True) + + if args.terminal_log and Path(args.terminal_log).exists(): + rows = parse_terminal_log(args.terminal_log) + print(f'Loaded {len(rows)} IBVS runs from terminal log') + elif Path(args.log).exists(): + rows = load_from_csv(args.log) + rows = [r for r in rows if r['ibvs_s'] or r['err_px']] + print(f'Loaded {len(rows)} rows with IBVS data from capture_log.csv') + else: + print(f'No data source found. Provide --log or --terminal_log'); sys.exit(1) + + if not rows: print('No rows with IBVS data found.'); sys.exit(1) + + print(f'\n=== IBVS CONVERGENCE EVALUATION ===') + + # Overall stats + conv = [r for r in rows if r['converged']] + rate = len(conv)/len(rows)*100 + times = [r['ibvs_s'] for r in conv if r.get('ibvs_s')] + errs = [r['err_px'] for r in conv if r.get('err_px')] + print(f'Total runs: {len(rows)}') + print(f'Converged: {len(conv)} ({rate:.0f}%) (target > 90%)') + if times: print(f'Avg IBVS time: {statistics.mean(times):.2f}s (target < 5s)') + if errs: print(f'Avg final error: {statistics.mean(errs):.2f}px (target < 10px)') + if errs: print(f'Max final error: {max(errs):.2f}px') + + if args.group_by: + print(f'\nBreakdown by {args.group_by}:') + summarize(rows, group_by=args.group_by) + + # Save + if rows and isinstance(rows[0], dict): + with open(out_path, 'w', newline='') as f: + writer = csv.DictWriter(f, fieldnames=rows[0].keys()) + writer.writeheader() + writer.writerows(rows) + print(f'\nResults saved to: {out_path}') + +if __name__ == '__main__': + main() diff --git a/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/Evaluation_V_I_ws/scripts/evaluate_image_quality.py b/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/Evaluation_V_I_ws/scripts/evaluate_image_quality.py new file mode 100644 index 00000000..b45dfde2 --- /dev/null +++ b/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/Evaluation_V_I_ws/scripts/evaluate_image_quality.py @@ -0,0 +1,161 @@ +#!/usr/bin/env python3 +""" +evaluate_image_quality.py +Run on LAPTOP after copying images from Jetson. + +Computes for each test image vs reference image: + - SSIM (structural similarity) + - PSNR (peak signal-to-noise ratio) + - YCbCr luminance comparison + - VIF (visual information fidelity) + - AlexNet cosine similarity + +Usage: + python3 scripts/evaluate_image_quality.py \ + --ref eval_dataset/reference/fire_ext_ref_01.jpg \ + --test eval_dataset/angle_eval/horizontal/30deg_L/ \ + --out results/image_quality_30deg_L.csv +""" + +import argparse, csv, sys, os +from pathlib import Path +import cv2 +import numpy as np + +def load_pair(ref_path, test_path, size=(640, 480)): + ref = cv2.imread(str(ref_path)) + test = cv2.imread(str(test_path)) + if ref is None: raise FileNotFoundError(f'Cannot read ref: {ref_path}') + if test is None: raise FileNotFoundError(f'Cannot read test: {test_path}') + test_r = cv2.resize(test, (ref.shape[1], ref.shape[0])) + return ref, test_r + +def compute_ssim(ref, test): + from skimage.metrics import structural_similarity as ssim_fn + ref_g = cv2.cvtColor(ref, cv2.COLOR_BGR2GRAY) + test_g = cv2.cvtColor(test, cv2.COLOR_BGR2GRAY) + score, _ = ssim_fn(ref_g, test_g, full=True) + return round(float(score), 4) + +def compute_psnr(ref, test): + val = cv2.PSNR(ref, test) + return round(float(val), 2) + +def compute_ycbcr(ref, test): + ref_y = cv2.cvtColor(ref, cv2.COLOR_BGR2YCrCb)[:,:,0] + test_y = cv2.cvtColor(test, cv2.COLOR_BGR2YCrCb)[:,:,0] + from skimage.metrics import structural_similarity as ssim_fn + y_ssim, _ = ssim_fn(ref_y, test_y, full=True) + y_diff = abs(float(ref_y.mean()) - float(test_y.mean())) + return round(float(y_ssim), 4), round(y_diff, 2), round(float(test_y.mean()), 1) + +def compute_vif(ref, test): + try: + import piq, torch + def to_t(img): + img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB).astype('float32') / 255.0 + return torch.from_numpy(img).permute(2,0,1).unsqueeze(0) + score = piq.vif_p(to_t(ref), to_t(test), data_range=1.0) + return round(float(score.item()), 4) + except Exception as e: + print(f' [VIF] skipped: {e}') + return None + +def compute_alexnet(ref_path, test_path): + try: + import torch, torchvision.models as models, torchvision.transforms as T + import torch.nn.functional as F + from PIL import Image + + alexnet = models.alexnet(weights='IMAGENET1K_V1') + extractor = torch.nn.Sequential(*list(alexnet.features.children())) + extractor.eval() + pre = T.Compose([T.Resize(256), T.CenterCrop(224), T.ToTensor(), + T.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225])]) + def feat(p): + img = Image.open(p).convert('RGB') + with torch.no_grad(): + return extractor(pre(img).unsqueeze(0)).flatten() + cos = F.cosine_similarity(feat(ref_path).unsqueeze(0), + feat(test_path).unsqueeze(0)) + return round(float(cos.item()), 4) + except Exception as e: + print(f' [AlexNet] skipped: {e}') + return None + +def eval_one(ref_path, test_path): + ref, test = load_pair(ref_path, test_path) + ssim = compute_ssim(ref, test) + psnr = compute_psnr(ref, test) + y_ssim, y_diff, y_mean = compute_ycbcr(ref, test) + vif = compute_vif(ref, test) + alex = compute_alexnet(ref_path, test_path) + + status = 'PASS' + if ssim is not None and ssim < 0.70: status = 'FAIL' + if psnr is not None and psnr < 30: status = 'FAIL' + if vif is not None and vif < 0.40: status = 'FAIL' + if alex is not None and alex < 0.85: status = 'WARN' + + print(f' {"✓" if status=="PASS" else "✗"} {Path(test_path).name} ' + f'SSIM={ssim} PSNR={psnr}dB VIF={vif} AlexNet={alex} → {status}') + return { + 'test_image': str(test_path), + 'ssim': ssim, 'psnr': psnr, + 'y_ssim': y_ssim, 'y_mean': y_mean, 'y_brightness_diff': y_diff, + 'vif': vif, 'alexnet_cos': alex, 'status': status + } + +def main(): + ap = argparse.ArgumentParser() + ap.add_argument('--ref', required=True, help='Reference image path') + ap.add_argument('--test', required=True, help='Test image or folder of images') + ap.add_argument('--out', default='results/image_quality.csv') + args = ap.parse_args() + + ref_path = Path(args.ref) + test_path = Path(args.test) + out_path = Path(args.out) + out_path.parent.mkdir(parents=True, exist_ok=True) + + if not ref_path.exists(): + print(f'ERROR: reference not found: {ref_path}'); sys.exit(1) + + if test_path.is_dir(): + test_files = sorted(test_path.glob('*.jpg')) + sorted(test_path.glob('*.png')) + else: + test_files = [test_path] + + if not test_files: + print(f'No images found in {test_path}'); sys.exit(1) + + print(f'\nEvaluating {len(test_files)} image(s) vs reference: {ref_path.name}') + results = [] + for f in test_files: + try: + results.append(eval_one(ref_path, f)) + except Exception as e: + print(f' ERROR {f.name}: {e}') + + if results: + with open(out_path, 'w', newline='') as f: + writer = csv.DictWriter(f, fieldnames=results[0].keys()) + writer.writeheader() + writer.writerows(results) + + # Summary + good = [r for r in results if r['ssim'] is not None] + print(f'\n=== SUMMARY ===') + print(f'Images evaluated: {len(results)}') + print(f'Avg SSIM: {sum(r["ssim"] for r in good)/len(good):.4f} (target > 0.70)') + print(f'Avg PSNR: {sum(r["psnr"] for r in good)/len(good):.2f} dB (target > 30)') + if any(r["vif"] for r in good): + vifl = [r["vif"] for r in good if r["vif"]] + print(f'Avg VIF: {sum(vifl)/len(vifl):.4f} (target > 0.40)') + if any(r["alexnet_cos"] for r in good): + all_ = [r["alexnet_cos"] for r in good if r["alexnet_cos"]] + print(f'Avg AlexNet Sim: {sum(all_)/len(all_):.4f} (target > 0.85)') + print(f'Results saved to: {out_path}') + +if __name__ == '__main__': + main() diff --git a/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/Evaluation_V_I_ws/scripts/evaluate_vlm.py b/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/Evaluation_V_I_ws/scripts/evaluate_vlm.py new file mode 100644 index 00000000..0c7d3b8b --- /dev/null +++ b/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/Evaluation_V_I_ws/scripts/evaluate_vlm.py @@ -0,0 +1,184 @@ +#!/usr/bin/env python3 +""" +evaluate_vlm.py +Run on LAPTOP. Requires vi_server running. + +Evaluates VLM PASS/FAIL decision accuracy for fire_extinguisher, +door, emergency_exit, main_cylinder, unknown object types. + +Usage: + python3 scripts/evaluate_vlm.py \ + --base eval_dataset/vlm_eval/ \ + --log eval_dataset/capture_log.csv \ + --server http://localhost:8001 \ + --out results/vlm_eval_results.csv + +Folder → object_type + expected decision mapping: + fire_ext_pass/ → fire_extinguisher PASS + fire_ext_fail/ → fire_extinguisher FAIL + exit_pass/ → emergency_exit PASS + exit_fail/ → emergency_exit FAIL + door_pass/ → door PASS + door_fail/ → door FAIL + cylinder_pass/ → main_cylinder PASS + cylinder_fail/ → main_cylinder FAIL + unknown_various/ → unknown None (just check routing) +""" + +import argparse, csv, json, time, sys, statistics +from pathlib import Path +from collections import defaultdict +import requests + +FOLDER_MAP = { + 'fire_ext_pass': ('fire_extinguisher', 'PASS'), + 'fire_ext_fail': ('fire_extinguisher', 'FAIL'), + 'exit_pass': ('emergency_exit', 'PASS'), + 'exit_fail': ('emergency_exit', 'FAIL'), + 'door_pass': ('door', 'PASS'), + 'door_fail': ('door', 'FAIL'), + 'cylinder_pass': ('main_cylinder', 'PASS'), + 'cylinder_fail': ('main_cylinder', 'FAIL'), + 'unknown_various':('unknown', None), +} + +def poll_job(server, job_id, timeout=120): + deadline = time.time() + timeout + while time.time() < deadline: + try: + r = requests.get(f'{server}/api/v1/jobs/{job_id}', timeout=10) + job = r.json() + if job.get('status') in ['DONE', 'FAILED']: + return job + except Exception: + pass + time.sleep(2) + return {'status': 'TIMEOUT', 'result_json': '{}'} + +def load_captions(log_csv): + captions = {} + if not Path(log_csv).exists(): + return captions + with open(log_csv, newline='') as f: + for row in csv.DictReader(f): + fname = row.get('filename', '').strip() + note = row.get('notes', '').strip() + true = row.get('ground_truth_value', '').strip() + if fname: + captions[fname] = {'caption': note, 'true_decision': true} + return captions + +def main(): + ap = argparse.ArgumentParser() + ap.add_argument('--base', required=True, help='Base folder: eval_dataset/vlm_eval/') + ap.add_argument('--log', default='eval_dataset/capture_log.csv') + ap.add_argument('--server', default='http://localhost:8001') + ap.add_argument('--out', default='results/vlm_eval_results.csv') + ap.add_argument('--consistency', action='store_true', + help='Run each image 3x to test consistency') + args = ap.parse_args() + + base = Path(args.base) + out_path = Path(args.out) + out_path.parent.mkdir(parents=True, exist_ok=True) + captions = load_captions(args.log) + + all_results = [] + + for folder_name, (obj_type, expected) in FOLDER_MAP.items(): + folder = base / folder_name + if not folder.exists(): + print(f' [skip] {folder_name}/ not found') + continue + + images = sorted(folder.glob('*.jpg')) + sorted(folder.glob('*.png')) + if not images: + continue + + print(f'\n--- {folder_name}/ ({obj_type}, expected={expected}) --- {len(images)} images') + + for img_path in images: + fname = img_path.name + cap_info = captions.get(fname, {}) + + runs = 3 if args.consistency else 1 + decisions = [] + for run in range(runs): + t0 = time.time() + try: + with open(img_path, 'rb') as f: + r = requests.post(f'{args.server}/api/v1/jobs', + files={'file': ('image.jpg', f, 'image/jpeg')}, + data={'object_type': obj_type}, timeout=30) + job_id = r.json().get('job_id') + job = poll_job(args.server, job_id) + elapsed = round(time.time() - t0, 1) + + if job['status'] == 'DONE': + data = json.loads(job.get('result_json', '{}')) + decision = data.get('decision', 'UNKNOWN') + conf = data.get('confidence', 0.0) + summary = data.get('summary', '')[:120] + else: + decision, conf, summary = 'ERROR', 0.0, '' + + decisions.append(decision) + + if run == 0: + correct = (expected is None) or (decision == expected) + print(f' {"✓" if correct else "✗"} {fname}: ' + f'{expected}→{decision} ({conf:.2f}) [{elapsed}s]') + + all_results.append({ + 'folder': folder_name, 'filename': fname, + 'object_type': obj_type, 'expected': expected or 'N/A', + 'predicted': decision, 'confidence': conf, + 'correct': correct, 'time_s': elapsed, + 'summary': summary, + 'caption': cap_info.get('caption', '') + }) + except Exception as e: + print(f' ERROR {fname}: {e}') + + if args.consistency and len(decisions) == 3: + consistent = len(set(decisions)) == 1 + all_results[-1]['consistency'] = consistent + all_results[-1]['decisions_3x'] = str(decisions) + print(f' Consistency: {decisions} → {"✓" if consistent else "✗ INCONSISTENT"}') + + if not all_results: + print('No results.'); sys.exit(1) + + with open(out_path, 'w', newline='') as f: + writer = csv.DictWriter(f, fieldnames=all_results[0].keys()) + writer.writeheader() + writer.writerows(all_results) + + # --- Summary --- + print(f'\n=== VLM EVALUATION SUMMARY ===') + by_type = defaultdict(list) + for r in all_results: + by_type[r['object_type']].append(r) + + for t, rows in sorted(by_type.items()): + labeled = [r for r in rows if r['expected'] != 'N/A'] + acc = sum(r['correct'] for r in labeled) / len(labeled) * 100 if labeled else 0 + avg_t = statistics.mean(r['time_s'] for r in rows) + print(f' {t:20s}: {acc:.0f}% accuracy | avg {avg_t:.1f}s/image | n={len(rows)}') + + labeled_all = [r for r in all_results if r['expected'] != 'N/A'] + if labeled_all: + tp = sum(1 for r in labeled_all if r['expected']=='FAIL' and r['predicted']=='FAIL') + fp = sum(1 for r in labeled_all if r['expected']!='FAIL' and r['predicted']=='FAIL') + fn = sum(1 for r in labeled_all if r['expected']=='FAIL' and r['predicted']!='FAIL') + prec = tp/(tp+fp) if (tp+fp) > 0 else 0 + rec = tp/(tp+fn) if (tp+fn) > 0 else 0 + f1 = 2*prec*rec/(prec+rec) if (prec+rec) > 0 else 0 + overall = sum(r['correct'] for r in labeled_all)/len(labeled_all)*100 + print(f'\n Overall accuracy: {overall:.0f}%') + print(f' FAIL class — Precision: {prec:.2f} Recall: {rec:.2f} F1: {f1:.2f}') + print(f' (Recall target > 0.90 — must not miss failures)') + print(f'\n Results saved to: {out_path}') + +if __name__ == '__main__': + main() diff --git a/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/Evaluation_V_I_ws/scripts/evaluate_vlm_llm_judge.py b/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/Evaluation_V_I_ws/scripts/evaluate_vlm_llm_judge.py new file mode 100644 index 00000000..f41c57f6 --- /dev/null +++ b/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/Evaluation_V_I_ws/scripts/evaluate_vlm_llm_judge.py @@ -0,0 +1,138 @@ +#!/usr/bin/env python3 +""" +evaluate_vlm_llm_judge.py +Run on LAPTOP. Requires Google Gemini API key. + +Uses Gemini 1.5 Pro as a judge to evaluate whether the VLM's +inspection report is CORRECT / PARTIALLY_CORRECT / INCORRECT +compared to a human-written ground truth caption. + +Usage: + export GEMINI_API_KEY=your_key_here + python3 scripts/evaluate_vlm_llm_judge.py \ + --results results/vlm_eval_results.csv \ + --images eval_dataset/vlm_eval/ \ + --out results/llm_judge_results.csv +""" + +import argparse, csv, json, os, sys, time +from pathlib import Path + +def judge_one(judge_model, image_path, caption, vlm_output): + import google.generativeai as genai + from PIL import Image + + img = Image.open(image_path) + prompt = f"""You are an expert evaluator for an industrial visual inspection AI system. + +GROUND TRUTH (what a human expert says about this image): +"{caption}" + +VLM INSPECTION REPORT (what the AI system produced): +Decision: {vlm_output.get('predicted', 'N/A')} +Expected: {vlm_output.get('expected', 'N/A')} +Confidence: {vlm_output.get('confidence', 'N/A')} +Summary: {vlm_output.get('summary', 'N/A')} + +Your task: +1. Look at the image +2. Compare the AI report against the ground truth caption +3. Rate: CORRECT / PARTIALLY_CORRECT / INCORRECT +4. Explain specifically what is right and what is wrong + +Respond ONLY in valid JSON (no markdown): +{{"rating": "CORRECT|PARTIALLY_CORRECT|INCORRECT", "explanation": "...", "decision_correct": true|false, "findings_accurate": true|false}} +""" + resp = judge_model.generate_content([prompt, img]) + text = resp.text.strip() + # Strip markdown code block if present + if text.startswith('```'): + text = text.split('```')[1] + if text.startswith('json'): + text = text[4:] + return json.loads(text.strip()) + +def main(): + ap = argparse.ArgumentParser() + ap.add_argument('--results', required=True, help='vlm_eval_results.csv from evaluate_vlm.py') + ap.add_argument('--images', required=True, help='Base folder: eval_dataset/vlm_eval/') + ap.add_argument('--out', default='results/llm_judge_results.csv') + args = ap.parse_args() + + api_key = os.environ.get('GEMINI_API_KEY', '') + if not api_key: + print('ERROR: Set GEMINI_API_KEY environment variable') + sys.exit(1) + + import google.generativeai as genai + genai.configure(api_key=api_key) + judge = genai.GenerativeModel('gemini-1.5-pro') + + results_path = Path(args.results) + images_base = Path(args.images) + out_path = Path(args.out) + out_path.parent.mkdir(parents=True, exist_ok=True) + + with open(results_path, newline='') as f: + rows = list(csv.DictReader(f)) + + # Only judge rows that have a caption + to_judge = [r for r in rows if r.get('caption', '').strip()] + print(f'\nLLM Judge: {len(to_judge)}/{len(rows)} rows have captions to judge') + + judge_results = [] + for i, row in enumerate(to_judge): + folder = row['folder'] + fname = row['filename'] + caption = row['caption'] + img_path = images_base / folder / fname + + if not img_path.exists(): + print(f' SKIP {fname} — image not found at {img_path}') + continue + + print(f' [{i+1}/{len(to_judge)}] Judging {fname}...', end='', flush=True) + try: + verdict = judge_one(judge, img_path, caption, row) + rating = verdict.get('rating', 'ERROR') + print(f' {rating}') + judge_results.append({ + 'folder': folder, 'filename': fname, + 'object_type': row['object_type'], + 'expected': row['expected'], 'predicted': row['predicted'], + 'caption': caption, + 'llm_rating': rating, + 'decision_correct': verdict.get('decision_correct'), + 'findings_accurate': verdict.get('findings_accurate'), + 'explanation': verdict.get('explanation', '')[:200] + }) + time.sleep(1) # Rate limit + except Exception as e: + print(f' ERROR: {e}') + judge_results.append({ + 'folder': folder, 'filename': fname, + 'object_type': row['object_type'], + 'expected': row['expected'], 'predicted': row['predicted'], + 'caption': caption, 'llm_rating': 'ERROR', + 'decision_correct': None, 'findings_accurate': None, + 'explanation': str(e) + }) + + if judge_results: + with open(out_path, 'w', newline='') as f: + writer = csv.DictWriter(f, fieldnames=judge_results[0].keys()) + writer.writeheader() + writer.writerows(judge_results) + + # Summary + from collections import Counter + counts = Counter(r['llm_rating'] for r in judge_results) + total = len(judge_results) + print(f'\n=== LLM JUDGE SUMMARY ===') + print(f'CORRECT: {counts["CORRECT"]}/{total} ({counts["CORRECT"]/total*100:.0f}%)') + print(f'PARTIALLY_CORRECT: {counts["PARTIALLY_CORRECT"]}/{total} ({counts["PARTIALLY_CORRECT"]/total*100:.0f}%)') + print(f'INCORRECT: {counts["INCORRECT"]}/{total} ({counts["INCORRECT"]/total*100:.0f}%)') + print(f'Results saved to: {out_path}') + +if __name__ == '__main__': + main() diff --git a/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/Evaluation_V_I_ws/scripts/requirements_eval.txt b/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/Evaluation_V_I_ws/scripts/requirements_eval.txt new file mode 100644 index 00000000..cc20661e --- /dev/null +++ b/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/Evaluation_V_I_ws/scripts/requirements_eval.txt @@ -0,0 +1,13 @@ +scikit-image +piq +torch +torchvision +bert-score +google-generativeai +Pillow +opencv-python +numpy +requests +pandas +matplotlib +seaborn diff --git a/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/Evaluation_V_I_ws/setup_eval_folders.sh b/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/Evaluation_V_I_ws/setup_eval_folders.sh new file mode 100644 index 00000000..3791bfec --- /dev/null +++ b/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/Evaluation_V_I_ws/setup_eval_folders.sh @@ -0,0 +1,47 @@ +#!/bin/bash +# setup_eval_folders.sh +# Run once on Jetson to create all evaluation dataset folders +# Usage: bash ~/Documents/Visual_Inspection_ws/Evaluation_V_I_ws/setup_eval_folders.sh + +echo "Creating evaluation dataset folder structure..." + +BASE=~/eval_dataset + +# Reference images +mkdir -p $BASE/reference + +# Angle evaluation +mkdir -p $BASE/angle_eval/horizontal/{0deg,15deg_L,15deg_R,30deg_L,30deg_R,45deg_L,45deg_R} +mkdir -p $BASE/angle_eval/vertical/{0deg,15deg_up,15deg_down,30deg_up,30deg_down} + +# Distance evaluation +mkdir -p $BASE/distance_eval/{1m,2m,3m,4m} + +# Occlusion evaluation +mkdir -p $BASE/occlusion/{0pct,25pct,50pct,75pct} + +# Gauge accuracy ground truth +mkdir -p $BASE/gauge_accuracy + +# VLM evaluation images +mkdir -p $BASE/vlm_eval/{fire_ext_pass,fire_ext_fail,exit_pass,exit_fail,cylinder_pass,cylinder_fail,door_pass,door_fail,unknown_various} + +# IBVS convergence logs (auto-written by action server) +mkdir -p $BASE/ibvs_logs + +# Multi-object scenes +mkdir -p $BASE/multi_object/{2_objects,3_objects} + +# Create capture log CSV with header +LOG=$BASE/capture_log.csv +if [ ! -f "$LOG" ]; then + echo "timestamp,folder,filename,object_type,distance_m,angle_deg,angle_direction,occlusion_pct,n_objects,ibvs_time_s,final_error_px,converged,ground_truth_value,notes" > $LOG + echo "Created capture_log.csv with header" +else + echo "capture_log.csv already exists — not overwriting" +fi + +echo "" +echo "Done! Structure created at: $BASE" +echo "" +tree $BASE -d 2>/dev/null || find $BASE -type d | sort diff --git a/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/Evaluation_V_I_ws/sync_from_jetson.sh b/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/Evaluation_V_I_ws/sync_from_jetson.sh new file mode 100755 index 00000000..ceea7f86 --- /dev/null +++ b/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/Evaluation_V_I_ws/sync_from_jetson.sh @@ -0,0 +1,62 @@ +#!/bin/bash +# sync_from_jetson.sh +# Pull ALL captured data from Jetson to local machine. +# +# Usage: bash sync_from_jetson.sh +# +# Syncs: +# Jetson: ~/Documents/Visual_Inspection_ws/evaluation/ -> local: Evaluation_V_I_ws/eval_dataset/ +# Jetson: ~/Documents/Visual_Inspection_ws/captures/ -> local: data/captures_from_jetson/ +# Jetson: ~/Documents/Visual_Inspection_ws/yolo_dataset/ -> local: data/yolo_dataset/ + +JETSON_USER="rgen" +JETSON_IP="192.168.8.181" +JETSON_BASE="~/Documents/Visual_Inspection_ws" + +LOCAL_BASE="$(cd "$(dirname "$0")/.." && pwd)" +LOCAL_EVAL="${LOCAL_BASE}/Evaluation_V_I_ws/eval_dataset" +LOCAL_CAPTURES="${LOCAL_BASE}/data/captures_from_jetson" +LOCAL_YOLO="${LOCAL_BASE}/data/yolo_dataset" + +echo "" +echo "======================================================" +echo " SYNC FROM JETSON -> LOCAL" +echo "======================================================" +echo " Jetson : ${JETSON_USER}@${JETSON_IP}:${JETSON_BASE}" +echo " Local : ${LOCAL_BASE}" +echo "" + +mkdir -p "${LOCAL_EVAL}" +mkdir -p "${LOCAL_CAPTURES}" +mkdir -p "${LOCAL_YOLO}" + +# 1. Evaluation dataset +echo "[ 1/3 ] Syncing evaluation/ ..." +rsync -avz --progress \ + "${JETSON_USER}@${JETSON_IP}:${JETSON_BASE}/evaluation/" \ + "${LOCAL_EVAL}/" +echo "" + +# 2. Captures (all pipeline + MQTT captures) +echo "[ 2/3 ] Syncing captures/ ..." +rsync -avz --progress \ + "${JETSON_USER}@${JETSON_IP}:${JETSON_BASE}/captures/" \ + "${LOCAL_CAPTURES}/" +echo "" + +# 3. YOLO training dataset +echo "[ 3/3 ] Syncing yolo_dataset/ ..." +rsync -avz --progress \ + "${JETSON_USER}@${JETSON_IP}:${JETSON_BASE}/yolo_dataset/" \ + "${LOCAL_YOLO}/" +echo "" + +# Summary +echo "======================================================" +echo " DONE" +echo "" +echo " Eval dataset : $(find "${LOCAL_EVAL}" -name '*.jpg' 2>/dev/null | wc -l) images" +echo " Captures : $(find "${LOCAL_CAPTURES}" -name '*.jpg' 2>/dev/null | wc -l) images" +echo " YOLO train data : $(find "${LOCAL_YOLO}" -name '*.jpg' 2>/dev/null | wc -l) images" +echo "======================================================" +echo "" diff --git a/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/README.md b/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/README.md index 08face51..5bf69687 100644 --- a/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/README.md +++ b/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/README.md @@ -1,4 +1,10 @@ -# Visual Inspection System +# Visual Inspection System — Jetson Orin Nano + +> 📦 **Full development repository with complete commit history (108 commits, 3 months of work):** +> **[DinethraDivanjana2001/Visual_Inspection_system_Jetson_orin_nano](https://github.com/DinethraDivanjana2001/Visual_Inspection_system_Jetson_orin_nano)** +> +> This folder is integrated here as part of the Go2 Dynamic Inspection FYP project. +> Visit the link above to see the full development timeline, all commits, and contribution history. Research-grade visual inspection system using YOLOv11 + TensorRT on Jetson Orin Nano with panoramic detection and pan-tilt-zoom control. diff --git a/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/docs/DEMO_COMMANDS.md b/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/docs/DEMO_COMMANDS.md new file mode 100644 index 00000000..2633291e --- /dev/null +++ b/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/docs/DEMO_COMMANDS.md @@ -0,0 +1,370 @@ +# Visual Inspection System — Demo & Run Commands +**Jetson Orin Nano | Ubuntu 22.04 | ROS2 Humble** + +SSH into Jetson first (from laptop): +```bash +ssh rgen@192.168.8.181 +# password: ffddffdd +``` + +--- + +## QUICK REFERENCE — What mode to use + +| Mode | When to use | What you see | +|------|-------------|--------------| +| Headed Python | Demo to someone physically at the Jetson screen / VNC | Live camera window with bounding boxes | +| Headless Python | SSH terminal only, no display | FPS + IBVS error numbers in terminal | +| ROS2 + RViz | Full ROS integration demo | Topics + RViz2 debug view from laptop | +| Dataset collection | Evaluation data capture session | Pipeline auto-runs, images auto-logged | +| YOLO training data | Collect raw images for retraining | Both cameras preview side-by-side | + +--- + +## 1. PYTHON PIPELINE — HEADED MODE (visual window via VNC) + +**Use:** Connect to Jetson via VNC first, then open a terminal on the Jetson desktop. + +```bash +# Activate venv +source ~/Documents/Visual_Inspection_ws/venv/bin/activate + +# Run headed pipeline +cd ~/Documents/Visual_Inspection_ws/jetson_workspace +python3 ibvs_headed.py +``` + +**Optional shortcut (also frees cameras + sets max Jetson performance):** +```bash +cd ~/Documents/Visual_Inspection_ws/jetson_workspace +./run.sh +``` + +**What to watch:** +- Live side-by-side window: Insta360 (left) + Logitech (right) +- Green bounding boxes on detected objects (Insta360 view) +- Blue IBVS correction arrow + target crosshair (Logitech view) +- Status bar: `DETECTING → COARSE → IBVS → CAPTURING` +- IBVS pixel error displayed in real time (target < 10 px) + +--- + +## 2. PYTHON PIPELINE — HEADLESS MODE (terminal only, SSH) + +**Use:** When demonstrating over SSH without a screen. + +```bash +# Activate venv +source ~/Documents/Visual_Inspection_ws/venv/bin/activate + +# Run headless pipeline +cd ~/Documents/Visual_Inspection_ws/jetson_workspace +python3 ibvs_headless.py +``` + +**Optional shortcut:** +```bash +cd ~/Documents/Visual_Inspection_ws/jetson_workspace +./run.sh headless +``` + +**What to watch in terminal:** +``` +[Insta360] fire_extinguisher ID=1 cx=312 cy=180 zone=FRONT +[COARSE] pan=87 tilt=94 (polynomial mapping) +[IBVS] obj=1 err=142px → 84px → 31px → 9.3px CONVERGED +[CAPTURE] 4 images saved → captures/inspection/20260317_163000/ +[RESULT] success=True | objects_found=1 | objects_inspected=1 +``` + +--- + +## 3. ROS2 PIPELINE — 3 terminals + optional RViz + +### Terminal 1 — Camera node + +```bash +source /opt/ros/humble/setup.bash +source ~/Documents/Visual_Inspection_ws/inspection_ws/install/setup.bash +ros2 run visual_inspection_ros camera_node +``` + +**Expected output:** +``` +[INFO] Camera node started +[INFO] Insta360 → /dev/video2 640x360 @ 30 Hz +[INFO] Logitech → /dev/video0 640x480 @ 30 Hz +``` + +### Terminal 2 — Servo node + +```bash +source /opt/ros/humble/setup.bash +source ~/Documents/Visual_Inspection_ws/inspection_ws/install/setup.bash +ros2 run visual_inspection_ros servo_node +``` + +**Expected output:** +``` +[INFO] Servo node started — Arduino on /dev/ttyACM0 +[INFO] Home position: pan=90 tilt=90 +``` + +### Terminal 3 — IBVS action server + +```bash +source /opt/ros/humble/setup.bash +source ~/Documents/Visual_Inspection_ws/inspection_ws/install/setup.bash +ros2 run visual_inspection_ros ibvs_action_server +``` + +**Expected output:** +``` +[INFO] IBVS action server ready +[INFO] Waiting for goals... +``` + +### Terminal 4 — Send inspection goal (triggers the pipeline) + +```bash +source /opt/ros/humble/setup.bash +source ~/Documents/Visual_Inspection_ws/inspection_ws/install/setup.bash +python3 ~/Documents/Visual_Inspection_ws/test_scripts/test_full_pipeline.py +``` + +**Expected output:** +``` +[IBVS] obj=1 err=8.4px ....................... +RESULT: success=True | objects_found=1 | objects_inspected=1 +``` + +--- + +## 4. MONITOR ROS TOPICS (while pipeline is running) + +Open extra terminals alongside the 3 node terminals: + +```bash +source /opt/ros/humble/setup.bash + +# Live status (IDLE / DETECTING / COARSE / IBVS / CAPTURING) +ros2 topic echo /visual_inspection/status + +# Live IBVS pixel error (x, y, magnitude) — watch this converge +ros2 topic echo /visual_inspection/ibvs_error + +# Detection results (class, track_id, zone FRONT/BACK, bounding box) +ros2 topic echo /visual_inspection/detections + +# Pan/tilt servo commands being sent to Arduino +ros2 topic echo /servo/pan_tilt + +# List ALL active topics +ros2 topic list + +# Check publishing rate (e.g. camera at 30Hz?) +ros2 topic hz /visual_inspection/insta360/image_raw +ros2 topic hz /visual_inspection/logitech/image_raw +``` + +--- + +## 5. RVIZ2 — Live camera debug view (from laptop) + +RViz runs on your **laptop**, not Jetson. Set up ROS2 network between laptop and Jetson first: + +**On Jetson** (in each terminal, before sourcing ROS): +```bash +export ROS_DOMAIN_ID=42 +``` + +**On laptop:** +```bash +export ROS_DOMAIN_ID=42 +source /opt/ros/humble/setup.bash + +# Launch RViz2 +rviz2 +``` + +**In RViz2 — Add these displays:** +1. Click `Add` → `By topic` → `/visual_inspection/debug` → `Image` + - Shows: side-by-side Insta360 + Logitech with bounding boxes + IBVS arrow +2. Click `Add` → `By topic` → `/visual_inspection/insta360/image_raw` → `Image` +3. Click `Add` → `By topic` → `/visual_inspection/logitech/image_raw` → `Image` + +**Save the RViz config:** +```bash +# Save once set up — loads it next time +rviz2 -d ~/Documents/Visual_Inspection_ws/inspection_ws/inspection.rviz +``` + +--- + +## 6. EVALUATION DATASET COLLECTION + +**Runs on Jetson — requires ROS2 nodes running in T1, T2, T3 (see Section 3)** + +```bash +# Terminal 4 — start collection script +source /opt/ros/humble/setup.bash +source ~/Documents/Visual_Inspection_ws/inspection_ws/install/setup.bash +source ~/Documents/Visual_Inspection_ws/venv/bin/activate +python3 ~/Documents/Visual_Inspection_ws/evaluation/collect_dataset.py +``` + +**Menu options:** +``` + 1: Reference images ← clean baseline captures at 1m, head-on + 2: Angle evaluation ← any angle (0°, 15°, 30°, 45°...) + 3: Distance evaluation ← any distance (0.75m, 1m, 1.5m, 2m...) + 4: Gauge ground truth ← known gauge reading for MAE/RMSE + 5: VLM PASS/FAIL images ← blocked/unblocked scenarios + 6: Occlusion evaluation ← 0%, 25%, 50%, 75% covered + 7: Multi-object scenes ← 2-3 objects in same frame + s: Show collection status ← image counts per folder + q: Quit +``` + +**What happens automatically for each capture:** +1. Pipeline runs (Insta360 → YOLO → coarse → IBVS → capture) +2. IBVS convergence time + final pixel error parsed from output +3. Image copied to the correct evaluation subfolder +4. Row logged to `capture_log.csv` + +**Check collected data:** +```bash +ls ~/Documents/Visual_Inspection_ws/evaluation/ +cat ~/Documents/Visual_Inspection_ws/evaluation/capture_log.csv +``` + +**Image output location on Jetson:** +``` +~/Documents/Visual_Inspection_ws/evaluation/ + reference/ ← reference images + distance_eval/1m/ ← distance test images + distance_eval/2m/ ← ...etc + angle_eval/... + occlusion/... +``` + +--- + +## 7. YOLO RETRAINING DATASET COLLECTION + +**No ROS needed — runs standalone on Jetson Jetson desktop / VNC** + +```bash +source ~/Documents/Visual_Inspection_ws/venv/bin/activate +cd ~/Documents/Visual_Inspection_ws +python3 collect_yolo_dataset.py +``` + +**Controls in the camera preview window:** + +| Key | Action | +|-----|--------| +| `←` `→` | Pan servo left / right | +| `↑` `↓` | Tilt servo up / down | +| `SPACE` | Capture both cameras simultaneously | +| `+` / `-` | Step size 1°–15° per keypress | +| `h` | Home (pan=90°, tilt=90°) | +| `c` | Change class | +| `q` | Quit | + +**Classes:** `1` = fire_extinguisher `2` = door `3` = gauge + +**Image output on Jetson:** +``` +~/Documents/Visual_Inspection_ws/yolo_dataset/ + fire_extinguisher/ + insta360/ img_0001.jpg img_0002.jpg ... + logitech/ img_0001.jpg img_0002.jpg ... + door/ + insta360/ ... + logitech/ ... + gauge/ + insta360/ ... + logitech/ ... +``` + +--- + +## 8. SYNC DATA FROM JETSON TO LAPTOP + +Run on **laptop** (not on Jetson): + +```bash +bash /home/dinethra/Jetson_orin_nano/Evaluation_V_I_ws/sync_from_jetson.sh +``` + +**What it pulls:** + +| Jetson folder | Local destination | +|---------------|-------------------| +| `evaluation/` | `Evaluation_V_I_ws/eval_dataset/` | +| `captures/` | `data/captures_from_jetson/` | +| `yolo_dataset/` | `data/yolo_dataset/` | + +**Or pull individually:** +```bash +# Evaluation dataset only +rsync -avz rgen@192.168.8.181:~/Documents/Visual_Inspection_ws/evaluation/ \ + /home/dinethra/Jetson_orin_nano/Evaluation_V_I_ws/eval_dataset/ + +# YOLO training data only +rsync -avz rgen@192.168.8.181:~/Documents/Visual_Inspection_ws/yolo_dataset/ \ + /home/dinethra/Jetson_orin_nano/data/yolo_dataset/ +``` + +--- + +## 9. TROUBLESHOOTING + +### Cameras not found / "camera in use" +```bash +# Find what process is holding the camera +fuser /dev/video0 /dev/video1 /dev/video2 /dev/video3 + +# Kill the blocking process(es) +kill -9 + +# Or kill all known pipeline processes at once +pkill -9 -f "ibvs_headed\|ibvs_headless\|ibvs_pipeline\|camera_node\|collect_yolo" +``` + +### Arduino / servo not responding +```bash +# Check Arduino is connected +ls /dev/ttyACM0 /dev/arduino + +# Grant access +sudo chmod 666 /dev/ttyACM0 +sudo chmod 666 /dev/arduino + +# Test serial communication +python3 ~/Documents/Visual_Inspection_ws/test_scripts/04_test_arduino_serial.py +``` + +### ROS2 build after code changes +```bash +# Always build WITHOUT venv active +deactivate +cd ~/Documents/Visual_Inspection_ws/inspection_ws +colcon build --packages-select visual_inspection_ros +source install/setup.bash +``` + +### Check if ROS2 nodes are running +```bash +ros2 node list +# Expected: +# /camera_node +# /servo_node +# /ibvs_action_server +``` + +### Jetson performance mode (max speed) +```bash +sudo nvpmodel -m 0 # max power mode +sudo jetson_clocks # lock clocks to max diff --git a/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/docs/JETSON_WORKSPACE_OVERVIEW.md b/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/docs/JETSON_WORKSPACE_OVERVIEW.md new file mode 100644 index 00000000..0b84179b --- /dev/null +++ b/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/docs/JETSON_WORKSPACE_OVERVIEW.md @@ -0,0 +1,137 @@ +# Jetson Workspace Overview +*Last updated: 2026-04-17* + +--- + +## Connection Details +- **Jetson IP**: `rgen@192.168.8.181` +- **SSH**: `ssh rgen@192.168.8.181` +- **SCP**: `sshpass -p 'ffddffdd' scp rgen@192.168.8.181:` +- **Jetson user home**: `/home/rgen/` + +--- + +## Laptop (Dev Machine) → Jetson Folder Mapping + +| Laptop (`/home/dinethra/Jetson_orin_nano/`) | Jetson (`/home/rgen/Documents/Visual_Inspection_ws/`) | +|---|---| +| `inspection_ws/` | `inspection_ws/` (ROS2 workspace — the active one) | +| `jetson_workspace/` | Root of `Visual_Inspection_ws/` (standalone scripts) | +| `weights/` | `weights/` | +| `tools/` | `tools/` | +| `test_scripts/` | `test_scripts/` | +| `tests/` | `tests/` | + +--- + +## Jetson Side Structure (`~/Documents/Visual_Inspection_ws/`) + +### 🔴 ROOT (Standalone — older style, still used) +``` +Visual_Inspection_ws/ +├── ibvs_pipeline.py ← Main IBVS standalone script +├── ibvs_headed.py ← With display +├── ibvs_headless.py ← No display (production) +├── calibration_config.py ← Insta360 pan/tilt calibration polynomials +├── test_calibration_live.py ← Live calibration test +├── export_onnx.py ← Export .pt → .onnx +├── export_trt.py ← Export .onnx → .engine +├── run.sh ← Quick run script +├── QUICK_REFERENCE.md ← Command cheatsheet +├── SETUP.md ← Full setup guide +├── requirements.txt +├── venv/ ← Python virtualenv (activate before running standalone) +├── config/ +│ └── logitech_intrinsics.yaml +├── tests/ ← Test scripts (standalone) +├── test_scripts/ ← More test scripts +├── tools/ ← Utility scripts +├── captures/ ← Captured images +├── docs/ ← Documentation +├── evaluation/ ← Evaluation scripts +├── yolo_dataset/ ← Training dataset +└── arduino/ ← Arduino firmware +``` + +### 🟢 WEIGHTS (shared by both systems) +``` +weights/ +├── yolo11n.pt ← YOLO11n original model +├── yolo11n.onnx ← YOLO11n ONNX export +├── yolo11n.engine ← YOLO11n TensorRT engine ✅ (in use by ROS2) +├── yolov26s.pt ← NEW custom trained model (just transferred) +├── yolov26s.onnx ← ONNX export of new model +└── yolov26s.engine ← TensorRT engine of new model ✅ +``` + +### 🔵 INSPECTION_WS (ROS2 — the active/clean workspace) +``` +inspection_ws/ +├── visual_inspection_ros/ ← Main ROS2 package +│ ├── ibvs_action_server.py ← Main pipeline (IBVS + YOLO + MQTT) +│ ├── camera_node/ ← Camera publishers +│ ├── servo_node/ ← Servo control +│ └── action/ ← ROS2 action definitions +├── visual_inspection_interfaces/ ← Custom ROS2 message types +├── build/ ← colcon build output +├── install/ ← colcon install output +├── log/ ← ROS2 logs +├── pipeline/ ← Pipeline config +├── mqtt/ ← MQTT config +├── behaviour_tree/ ← BT nodes +├── docs/ ← ROS2 workspace docs +└── tests/ ← ROS2 tests +``` + +--- + +## Engine Path Used in ROS2 Pipeline +```python +# ibvs_action_server.py line 100: +ENGINE_PATH = '~/Documents/Visual_Inspection_ws/weights/yolo11n.engine' +``` +To switch to new model, change to `weights/yolov26s.engine` + +--- + +## How to Run on Jetson + +### Standalone (no ROS2) +```bash +cd ~/Documents/Visual_Inspection_ws +source venv/bin/activate +python3 ibvs_pipeline.py +``` + +### ROS2 Pipeline +```bash +# Terminal 1 — cameras +ros2 run visual_inspection_ros camera_node + +# Terminal 2 — servos +ros2 run visual_inspection_ros servo_node + +# Terminal 3 — main IBVS action server +source ~/Documents/Visual_Inspection_ws/inspection_ws/install/setup.bash +ros2 run visual_inspection_ros ibvs_action_server +``` + +### Rebuild ROS2 after code changes +```bash +cd ~/Documents/Visual_Inspection_ws/inspection_ws +colcon build --packages-select visual_inspection_ros +source install/setup.bash +``` + +--- + +## Sync from Laptop to Jetson +```bash +# Send a specific file +sshpass -p 'ffddffdd' scp rgen@192.168.8.181: + +# Example: sync ibvs_action_server.py +sshpass -p 'ffddffdd' scp \ + /home/dinethra/Jetson_orin_nano/inspection_ws/visual_inspection_ros/visual_inspection_ros/ibvs_action_server.py \ + rgen@192.168.8.181:~/Documents/Visual_Inspection_ws/inspection_ws/visual_inspection_ros/visual_inspection_ros/ibvs_action_server.py +``` diff --git a/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/docs/WEEKLY_REPORT_WEEK3.md b/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/docs/WEEKLY_REPORT_WEEK3.md new file mode 100644 index 00000000..efd310ce --- /dev/null +++ b/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/docs/WEEKLY_REPORT_WEEK3.md @@ -0,0 +1,239 @@ +# Weekly Progress Report - Visual Inspection System (Week 3) +# Period: Feb 17 – Feb 24, 2026 + +--- + +## Slide 1: Last Week Recap - Full Pipeline on Jetson + IBVS Working + +○ Completed full two-stage pipeline integration on Jetson Orin Nano (JetPack 6.1) + +○ Both cameras (Insta360 + Logitech C920) and Arduino servo controller connected and verified + +○ IBVS pipeline running end-to-end: COARSE stage (Insta360 → servo move) + FINE stage (Logitech IBVS centering) + +○ Initial frame rate: only **~3 FPS** — too slow for real-time visual servoing + +○ Identified 4 optimization targets: GPU inference, camera input, servo latency, bounding box drift + +**Script for Slide 1:** +"Last week I completed the full pipeline integration on the Jetson Orin Nano. Both stages are working — the Insta360 detects the object, moves the servo to point the Logitech camera, then IBVS kicks in for precise centering. The problem was performance — we were only getting about 3 frames per second. That's because YOLO was running on CPU. I also noticed a bounding box drift issue in the Y-axis direction. This week I solved both of these problems." + +--- + +## Slide 2: Phase 1 — TensorRT GPU Acceleration (10× Speedup) + +○ Identified Jetson as running **JetPack 6.1** (R36.4.3) with TensorRT 10.3.0 pre-installed + +○ Exported YOLO11n model to **TensorRT FP16 engine** format: + - Command: `model.export(format='engine', device=0, half=True, imgsz=640)` + - Export time: ~7 minutes on Jetson GPU + - Engine size: 8.4 MB (vs 5.4 MB PyTorch weights) + +○ Result: **3 FPS → 30 FPS** (10× improvement) + +○ Pipeline auto-selects TensorRT engine if available, falls back to PyTorch if not + +``` +Before (PyTorch CPU): ~3 FPS +After (TensorRT FP16): ~30 FPS ← 10× faster +``` + +*(Insert terminal screenshot showing [FPS] 30.0 fps output)* + +**Script for Slide 2:** +"The first optimization was TensorRT GPU acceleration. Jetson has NVIDIA's TensorRT already installed as part of JetPack. I exported the YOLO model to TensorRT FP16 format — this took about 7 minutes on the Jetson GPU but you only do it once. The result was dramatic: from 3 frames per second to 30 frames per second. That's 10 times faster, just by using the GPU properly. The pipeline automatically detects if the TensorRT engine file exists and uses it; otherwise it falls back to the standard PyTorch model." + +--- + +## Slide 3: Phase 2 — Reliable Device Detection with udev Rules + +○ **Problem discovered:** Plugging in the Logitech F710 joystick shifted USB device numbers — cameras and Arduino would randomly change from `/dev/video0` to `/dev/video2` etc., breaking the pipeline + +○ **Root cause:** Linux enumerates USB devices in connection order — any new device shifts existing numbering + +○ **Solution: udev rules** — permanent symlinks regardless of USB device order: + +``` +/dev/insta360 → always points to Insta360 X3 (VID=2e1a, PID=00c1) +/dev/logitech → always points to Logitech C920 (VID=046d, PID=08e5) +/dev/arduino → always points to Arduino Uno (VID=2341) +``` + +○ **3-Layer Camera Detection** (in code, at startup): + 1. udev symlink (fastest — fixed path) + 2. USB Vendor ID scan via sysfs (reliable fallback) + 3. Name-based search (original fallback) + +○ **Arduino auto-detection** (`find_arduino()`) scans all `/dev/ttyACM*` ports by USB vendor ID — works regardless of joystick connection + +*(Insert diagram: USB device numbering before vs after udev rules)* + +**Script for Slide 3:** +"We had a reliability problem. Every time the joystick was connected, the USB device numbers would shift and the pipeline would fail to find the cameras. This is a fundamental Linux behavior — devices are numbered in the order they connect. The solution was udev rules: permanent symbolic links that always point to the right device regardless of what else is plugged in. Now /dev/insta360 always points to the Insta360, /dev/logitech always points to the Logitech, and /dev/arduino always points to the Arduino. No matter what USB devices are connected. In the code, I also added a 3-layer detection: first try the udev symlink, then scan by USB vendor ID, then fall back to name search. And the Arduino is also auto-detected by its USB vendor ID at startup." + +--- + +## Slide 4: Headless vs Headed Modes + Run Scripts + +○ Implemented two operating modes for the pipeline: + +| Feature | Headed Mode | Headless Mode | +|---------|-------------|---------------| +| Camera window | ✅ Live dual-camera view (via VNC) | ❌ No window | +| FPS in terminal | ✅ Every second | ✅ Every second | +| Speed | Slightly slower (drawing overhead) | 🔥 Maximum speed | +| Use case | Debugging, tuning | Deployment, production | + +○ Created two separate scripts: + - `ibvs_headed.py` — for debugging with VNC camera view + - `ibvs_headless.py` — for production (maximum performance) + +○ Created `run.sh` launcher that automatically: + - Kills any stuck previous instances (releases cameras + serial port) + - Sets `DISPLAY=:1` for VNC + - Enables max Jetson performance (`nvpmodel -m 0`, `jetson_clocks`) + - Grants serial port permissions + +○ **FPS meaning:** + - COARSE mode: ~30 FPS (full pipeline speed with TensorRT) + - FINE mode: ~15-20 FPS (includes servo settling delay — this is correct behavior) + +**Script for Slide 4:** +"I also added two operating modes. Headed mode shows a live camera window with both cameras side by side — useful for debugging and tuning. Headless mode skips all display operations for maximum speed — this is what we'd use for production deployment. In both modes, FPS is printed to the terminal every second so you always know performance. You'll notice the FPS drops in FINE mode compared to COARSE — that's expected and correct. In FINE mode we have to wait after each servo command for the physical servo to settle before reading the next frame. If we don't wait, we'd be correcting based on an outdated position." + +--- + +## Slide 5: Phase 4 — Y-Axis Bounding Box Drift Fix + +○ **Problem:** Bounding box appeared shifted downward from the actual object position in the Logitech IBVS view + +○ **Root cause — coordinate space mismatch:** + +``` +YOLO ran on: frame_logi (640 × 480 pixels) +Results drawn on: frame_logi_disp (640 × 360 pixels) + ↑ + DIFFERENT HEIGHT — no scaling! +``` + +○ **Effect:** Object at y=300 in 480p space drawn at y=300 in 360p frame → shifted 75px too low + - The worse the object position (lower in frame), the larger the visual shift + +○ **Fix applied:** + 1. Run YOLO on `frame_logi_display` (640×360) — all coordinates in display space, no conversion needed + 2. Scale optical center cy from 480p → 360p: `cy = 257.50 × (360/480) = 193.1` + 3. All display drawing coordinates now exactly match what you see + +○ IBVS error computation also updated to use 360p-space cy → more accurate error signal + +*(Insert before/after screenshots showing bbox position correction)* + +**Script for Slide 5:** +"This was an important bug to fix. The bounding box was appearing lower than the actual object — this is called a coordinate space mismatch. YOLO was running on the full 480 pixel height frame, but the results were being drawn on a 360 pixel display frame without any scaling. So coordinates from the 480p world were being plotted incorrectly on the 360p display. The fix was to run YOLO directly on the display-sized frame. Everything is now in the same coordinate space. I also had to scale the camera's principal point cy proportionally — from the calibrated 480p value down to the 360p equivalent. This also improved the IBVS error signal accuracy." + +--- + +## Slide 6: Current System Performance Summary + +``` +┌────────────────────────────────────────────────────────┐ +│ OPTIMIZATION RESULTS │ +├──────────────────┬──────────────┬──────────────────────┤ +│ Metric │ Before │ After │ +├──────────────────┼──────────────┼──────────────────────┤ +│ FPS (COARSE) │ ~3 FPS │ ~30 FPS (10×) │ +│ FPS (FINE/IBVS) │ ~3 FPS │ ~15-20 FPS │ +│ YOLO Backend │ PyTorch CPU │ TensorRT GPU FP16 │ +│ Camera Detection │ Fixed index │ udev symlinks + VID │ +│ BBox Y drift │ ~50-75px off │ Fixed (360p space) │ +│ Device stability │ Breaks w/USB │ Stable always │ +└──────────────────┴──────────────┴──────────────────────┘ +``` + +○ Pipeline running reliably on Jetson Orin Nano at ~20 FPS during IBVS + +○ Both fire extinguisher and gauge centering confirmed working + +○ Code versioned on GitHub with full documentation + +*(Insert video of full pipeline running — COARSE → FINE centering)* + +**Script for Slide 6:** +"Here's the summary of this week's results. We went from 3 FPS to 30 FPS. Camera and Arduino detection is now reliable regardless of what other USB devices are connected. The bounding box drift is fixed. The pipeline runs stably and can be started with a single command. Both fire extinguisher and gauge detection and centering are confirmed working." + +--- + +## Slide 7: Next Week + +○ **Phase 3: PID Tuning** — Systematic gain tuning for faster IBVS convergence + - Current convergence: 3-8 seconds with oscillation + - Target: <3 seconds smooth convergence + - Method: Tune Kp, Ki, Kd individually per axis + +○ **Phase 3: Arduino C++ PID offload** — Move PID computation from Jetson Python to Arduino C++ + - Benefit: Servo commands sent at ~1000 Hz instead of 15-20 Hz + - Expected improvement: Faster, smoother servo response in FINE mode + +○ **Final validation dataset** — Collect performance data with optimized system + - Convergence time measurement + - Accuracy measurement (pixels from center at convergence) + - Range testing (1m, 2m, 3m, 4m, 5m) + +○ **System evaluation** — Full end-to-end benchmark on real inspection objects + +**Script for Slide 7:** +"For next week, there are two main goals. First, PID tuning — the IBVS converges but it takes 3 to 8 seconds and sometimes oscillates. I want to get that below 3 seconds with smooth movement. Second, I want to offload the PID computation to the Arduino. Right now, every servo command goes through Python on Jetson — that's slow. If the Arduino runs the PID itself, servo updates happen much faster. I also plan to run the full validation test — measuring convergence time and accuracy at different distances — so we have solid performance numbers for the evaluation." + +--- + +## Technical Explanations (For Your Study) + +**1. Why TensorRT is So Much Faster:** +- PyTorch runs at full 32-bit float precision on CPU → slow on embedded hardware +- TensorRT FP16 compiles the neural network specifically for the Jetson GPU +- It fuses layers, eliminates redundant operations, and uses GPU tensor cores +- Result: same accuracy, 10× the speed + +**2. Why USB Device Numbers Shift:** +- Linux assigns /dev/video0, /dev/video1, etc. in the order devices connect at boot +- If you add a joystick, it might get video0 and push cameras to video1, video2, etc. +- udev rules use hardware identifiers (USB Vendor ID + Product ID) — these never change +- So the symlink always points to the right physical device + +**3. What is USB Vendor ID?** +- Every USB device has a 4-digit Vendor ID (VID) and Product ID (PID) burned into hardware +- Insta360: VID=2e1a, PID=00c1 +- Logitech C920: VID=046d, PID=08e5 +- Arduino Uno: VID=2341 +- These IDs never change regardless of which USB port or what order you plug devices in + +**4. Headed vs Headless — Why Headless is Faster:** +- cv2.imshow requires compositing, memory copies, and display rendering +- These add ~2-3ms per frame (not much, but significant at 30 FPS) +- More importantly, headless mode skips all bounding box DRAWING operations +- The YOLO inference and servo control logic are identical in both modes + +**5. The Coordinate Space Bug Explained:** +``` +480p frame (y=0 to 480): + Object at y=350 — that's 73% down the frame + +360p display (y=0 to 360): + 73% down would be y=262 + But the code drew it at y=350 (out of 360!) → clipped/shifted way down + +Fix: Run YOLO on 360p frame directly → coordinates are already correct +``` + +**6. Why cy Must Be Scaled:** +- Kalibr calibration was done on the 640×480 Logitech image → cy=257.50 +- That means the optical axis crosses at y≈257 in a 480-pixel-tall image +- In a 360-pixel-tall image, that same physical optical axis is at: 257.50 × (360/480) = 193.1 +- Using the wrong cy means the IBVS thinks the image center is lower than it is → incorrect error signal + +**7. IBVS FPS in FINE Mode:** +- FINE mode runs at ~15-20 FPS (not 30 FPS) +- `time.sleep(IBVS_SERVO_DELAY)` is called after every servo command +- This wait is REQUIRED — servo needs time to physically move before the next frame is useful +- Reading a frame before servo settles would give a stale position → wrong correction → oscillation +- This is correct behavior, not a bug diff --git a/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/docs/WEEKLY_REPORT_WEEK4.md b/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/docs/WEEKLY_REPORT_WEEK4.md new file mode 100644 index 00000000..23ebb3b0 --- /dev/null +++ b/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/docs/WEEKLY_REPORT_WEEK4.md @@ -0,0 +1,289 @@ +# Weekly Progress Report - Visual Inspection System (Week 4) +# Period: 4 March – 10 March 2026 + +--- + +## Slide 1: Last Week Recap — ROS2 Conversion This Week + +○ Previous weeks completed: TensorRT acceleration, two-stage IBVS pipeline, udev camera detection — all working and benchmarked + +○ **This week's focus: ROS2 conversion** — wrapping the working pipeline into ROS2 nodes so Behaviour Tree integration can happen + +○ All items from PLAN.md now completed: + +| Plan Item | Status | +|-----------|--------| +| camera_node.py — both cameras published | ✅ Done | +| servo_node.py — Arduino control | ✅ Done | +| ibvs_action_server.py — IBVS as Action Server | ✅ Done | +| ByteTrack multi-object integration | ✅ Done | +| 4-image capture per object | ✅ Done | +| MQTT publisher (images + metadata) | ✅ Done | +| Front/back zone detection + object_in_back signal | ✅ Done | +| BT leaf node classes provided | ✅ Done | +| README for BT person | ✅ Done | + +**Script for Slide 1:** +"You already know the IBVS pipeline from last week — TensorRT at 30 FPS, two-stage centering, reliable camera detection. This week I converted all of that into ROS2. The reason is the Behaviour Tree integration — the navigation team needs standard ROS2 interfaces to call our inspection system. Every item on the plan we created for this phase is now done." + +--- + +## Slide 2: ROS2 Nodes Built + Topics + +**Three nodes created — each with a clear single responsibility:** + +``` +Node 1: camera_node + Publishes → /visual_inspection/insta360/image_raw (sensor_msgs/Image, 30 Hz) + → /visual_inspection/logitech/image_raw (sensor_msgs/Image, 30 Hz) + +Node 2: servo_node + Subscribes → /servo/pan_tilt (std_msgs/Int16MultiArray: [tilt, pan]) + Writes → Arduino serial (/dev/arduino) → physical pan-tilt servos + +Node 3: ibvs_action_server + Action → /visual_inspection/inspect_objects (InspectObjects action) + Publishes → /visual_inspection/debug (combined side-by-side camera view) + → /visual_inspection/status (IDLE/DETECTING/COARSE/IBVS/CAPTURING) + → /visual_inspection/ibvs_error (live pixel error: x, y, magnitude) + → /visual_inspection/detections (JSON: objects, class, track_id, zone) +``` + +**Camera access method:** OpenCV `cv2.VideoCapture` with Linux V4L2 backend +- Camera nodes subscribe to `/insta360/image_raw` and `/logitech/image_raw` +- Initially had difficulty opening cameras inside ROS2 nodes — solved by reusing the previously implemented camera access method (V4L2 with udev symlinks) adapted into a ROS2 publisher node + +*(Insert: RViz2 screenshot showing /visual_inspection/debug topic — side-by-side camera feed with bounding boxes)* + +**Script for Slide 2:** +"Three nodes, each doing one job. The camera node reads both cameras and publishes as standard ROS2 image topics. It uses V4L2 through OpenCV — we had previously implemented a reliable camera detection method using udev symlinks, so I adapted that approach into the ROS2 publisher. The servo node is just a bridge — it receives ROS2 messages and forwards them to Arduino over serial. The IBVS action server is the main node — it contains the entire inspection pipeline and communicates with the BT via a ROS2 Action. There are also 4 status topics so any external monitor or BT node can observe what's happening in real time." + +--- + +## Slide 3: Full Inspection Pipeline — Demo + +**Inspection flow:** + +``` +DETECTING (Insta360, up to 20s) + → ByteTrack assigns stable IDs to each object + → Split: cy < 200px = FRONT objects | cy > 200px = BACK objects + +COARSE (per front object) + → Polynomial formula: (cx_insta, cy_insta) → (pan_angle, tilt_angle) + → Servo moves to rough position (2s settle) + +IBVS (per front object, up to 40s) + → YOLO on Logitech → PID loop until error < 10px + → Feedback to BT: ibvs_error_px every tick + +CAPTURE (after 10s autofocus wait) + → 4 Logitech ROI images + 1 Insta360 overview image + → MQTT publish to ThingsBoard + save locally +``` + +*(Insert: terminal screenshot showing IBVS error reducing → from ~150px to 8.4px)* + +*(Insert: RViz2 /visual_inspection/debug screenshot — bounding box with class name, ID, IBVS arrow, mode overlay)* + +*(Insert: full inspection run video clip — servo moving, IBVS converging, capture)* + +**Test script (replaces BT until integration):** +```bash +python3 test_scripts/test_full_pipeline.py +# Output: +# [IBVS] obj=1 IBVS err= 8.4px ............................................. +# RESULT: success=True | objects_found=1 | objects_inspected=1 +# [BT] -> SUCCESS: All front objects inspected. +``` + +**Script for Slide 3:** +"This is the full pipeline running end to end. You can see in the terminal screenshot that the IBVS error starts around 150 pixels when the object first appears on the Logitech camera, and reduces down to around 8 pixels over several seconds — that's the PID loop doing its job. The RViz2 screenshot shows the combined debug view — left side is Insta360 with the front-back boundary line visible, right side is Logitech with the bounding box centred and the blue IBVS arrow showing the correction direction. The mode overlay shows IBVS, then switches to CAPTURING when converged. Since we don't have the BT connected yet, I built a test script that runs the same goal-feedback-result flow that the BT would use." + +--- + +## Slide 4: Key Technical Features Added + +**1. ByteTrack Multi-Object Tracking:** +- `model.track(persist=True, tracker='bytetrack.yaml')` from Ultralytics +- Each detected object gets a stable integer ID across frames (ID38 stays ID38) +- Multiple same-class objects (e.g., 3 fire extinguishers) processed in consistent track ID order +- Label shown on bounding box: `[fire_extinguisher] ID38 0.82` + +**2. Front / Back Zone Detection:** +- Insta360 equirectangular output: top half = FRONT robot direction, bottom half = BACK +- Threshold at y=200px — objects above are FRONT (green boxes), below are BACK (orange boxes) +- If all objects in BACK: result returns `object_in_back=True, failed_reason="all_in_back"` +- BT rotates robot 180° and calls inspection again + +**3. BT Leaf Nodes Provided (`bt_nodes/inspection_bt_nodes.py`):** +```python +from visual_inspection_ros.bt_nodes.inspection_bt_nodes import ( + InspectObjectsAction, # wraps full pipeline as py_trees Action + CaptureOverviewAction, # quick 360° snapshot for VLM (no IBVS) + CheckObjectInBack, # Condition: reads object_in_back from blackboard +) +``` + +**BT Decision Table:** +| `success` | `object_in_back` | `failed_reason` | BT Action | +|-----------|-----------------|-----------------|-----------| +| True | False | `""` | All done, continue | +| True/False | True | `""`/`"all_in_back"` | Rotate 180° → retry | +| False | False | `"no_detection"` | Move to next position | +| False | False | `"ibvs_timeout"` | Log alert, skip | + +**Script for Slide 4:** +"Three key technical features. ByteTrack gives us stable object IDs so we can handle multiple objects in one scene without confusion — the system always processes them in the same order. The front-back zone detection means the system can tell the BT when objects are behind the robot. And the BT leaf node classes mean Ravith can get the full inspection working with essentially a few lines of Python — import three classes, place them in the tree, done. The CaptureOverview node is also new — it lets the BT quickly grab a 360-degree snapshot for the VLM pipeline without moving the servos at all." + +--- + +## Slide 5: MQTT Integration + Dataset Captures + +**ThingsBoard MQTT working:** +- Broker: `demo.thingsboard.io:1883` +- Auth: device access token (ThingsBoard standard: username=token, password=empty) +- Each image sent as JSON with: session ID, class_name, object_id, base64 JPEG + +*(Insert: ThingsBoard screenshot showing "inspection" device Latest Telemetry with class_name, message, session, timestamp keys)* + +**Local capture folder (all images kept for dataset — `KEEP_LOCAL=True`):** +``` +captures/ +├── inspection/ ← Full IBVS captures (YOLO ROI + overview) +│ └── 20260309_120000/ ← Session = inspection timestamp +│ └── fire_extinguisher/ ← Auto-labelled from YOLO class name +│ └── instance_1/ +│ ├── img_01.jpg ← Logitech (after 10s autofocus wait) +│ ├── img_02.jpg +│ ├── img_03.jpg +│ ├── img_04.jpg +│ └── overview_01.jpg ← Insta360 with YOLO boxes +│ +└── overview/ ← Insta360-only snapshots for VLM + └── 20260309_121000/ + ├── gauge_room_A_01.jpg + └── gauge_room_A_02.jpg +``` + +**Initial dataset collected this week:** + +*(Insert: screenshot of captures/ folder showing collected images)* + +*(Insert: sample captured ROI images — gauge, fire extinguisher, door — showing image quality after 10s autofocus)* + +- Images auto-labelled by YOLO class → folder is already structured for training +- 10-second autofocus wait added → significantly sharper gauge images vs without wait + +**Script for Slide 5:** +"MQTT is working — you can see the ThingsBoard screenshot showing telemetry arriving from our device. Class name, session ID, and timestamp keys are visible. Each image is base64 encoded into a JSON payload. We also started collecting an initial dataset this week using the pipeline itself. The capture folder structure automatically labels images by YOLO class name — so every image already knows if it's a gauge, fire extinguisher, or door without any manual labeling. The 10-second autofocus wait before capture makes a meaningful difference for gauge images — the text on the gauge is actually readable in the captured images." + +--- + +## Slide 6: Blockers Encountered + Solutions + +**Blocker 1: Camera access inside ROS2 nodes** +- Problem: Initial approach to open cameras directly inside ROS2 nodes failed — device access conflicts +- Solution: **Reused previously implemented camera access method** (V4L2 via OpenCV with udev symlinks) and adapted it into a ROS2 publisher node. Separate camera_node publishes topics → ibvs_action_server only subscribes +- Result: Clean separation, cameras reliably accessible + +**Blocker 2: `ValueError: too many values to unpack` — IBVS crash** +- Problem: `_detect_insta()` updated to return 4 values but one call inside `_ibvs()` still expected 3 → exception silently caught by action framework → `failed_reason=""` with status=ABORTED +- How found: Checked Terminal 3 (action server log) for traceback — error visible there +- Fix: `_, _, insta_dbg = ...` → `_, _, _, insta_dbg = ...` (one underscore added) + +**Blocker 3: `UnboundLocalError: front_dets referenced before assignment`** +- Problem: During refactoring to add `overview_only` mode, accidentally deleted the `_search_insta()` call from the full inspection path +- Fix: Restored `front_dets, back_dets = self._search_insta(goal_handle)` in the correct location + +**Blocker 4: MQTT images not arriving on ThingsBoard** +- Root cause: `paho-mqtt` installed in `.venv` Python only — ROS2 action server uses **system Python** → `import paho` silently failed (caught by `except Exception`) +- Fix: `pip3 install paho-mqtt` on system Python (without activating venv) +- Lesson: ROS2 nodes always use system Python, never the venv + +**Blocker 5: `colcon build` fails with `ModuleNotFoundError: No module named 'em'`** +- Root cause: Building with `.venv` active overrides system Python used by `rosidl_adapter` +- Fix: Never activate `.venv` before `colcon build` — build only runs on Jetson, never laptop + +**Blocker 6: paho-mqtt 1.x `wait_for_publish(timeout=5)` crashes** +- Problem: ThingsBoard paho on Jetson is version 1.x — `timeout` argument not available +- Fix: Added try/except fallback → `result.wait_for_publish()` (no timeout arg for 1.x) + +**Script for Slide 6:** +"Six blockers this week — I'll go through the most educational ones. The camera access issue was about separation of concerns. Rather than fighting with direct camera access inside the action server, I used the camera architecture we previously implemented — a separate publisher node that opens the cameras, and the action server just subscribes. This is actually better design anyway. The MQTT silence was the trickiest — the action server was failing silently because paho-mqtt wasn't installed for the system Python. All my testing with the venv worked fine, but ROS2 runs on system Python. From now on any library that a ROS2 node needs must be installed with system pip3." + +--- + +## Slide 7: Next Week — Dataset Collection + Evaluation + +**Goal:** Collect structured dataset for all 3 YOLO classes using the pipeline + +| Class | Target images | Method | +|-------|--------------|--------| +| `gauge` | 100+ | Various distances, angles, lighting conditions | +| `fire_extinguisher` | 100+ | Various backgrounds, partial occlusion cases | +| `door` | 50+ | Different door types, open/closed, indoor lighting | + +**Collection method:** +1. Terminal 1-3: start ROS2 nodes +2. Terminal 4: run `test_full_pipeline.py` — pipeline automatically captures, labels, saves to `captures/inspection/` +3. Images already labeled by class → ready for YOLO fine-tuning after collection + +**Evaluation plan:** +- Measure IBVS convergence time at different distances (1m, 2m, 3m) +- Measure pixel error at convergence (target: < 10px) +- Measure capture quality score (can a human read the gauge? 1-5) +- Test front/back detection with objects in back zone → confirm BT signal works +- Test multi-object scenario (2-3 objects in same scene) + +**BT integration (with Ravith):** +- Share `inspection_bt_nodes.py` + README instructions +- Agree on blackboard key names +- Integration test: BT calls action → front object → success → BT calls again → back object → rotate 180° → retry + +**Script for Slide 7:** +"Next week has two parallel tasks. I'll be systematically collecting data using the pipeline — the folder structure already handles the labeling automatically so I just run the pipeline in front of each object type and it saves labeled images. The evaluation work will measure convergence time at different distances, error at convergence, and image quality for gauge readability. In parallel, Ravith will start connecting our BT leaf nodes into the main tree. The key test we want to do together is the back-side scenario — put an object behind the robot, confirm the system signals object_in_back=True, confirm the BT rotation happens, then inspect again. That end-to-end test will validate the full integration." + +--- + +## Technical Explanations (for your notes) + +**1. Why ROS2 Action (not Topic or Service)?** +- Topic: fire-and-forget, no response → can't get result or feedback +- Service: synchronous, one response only, no streaming → BT hangs for 60 seconds waiting +- Action: long-running, streaming feedback, structured result, fully cancellable → perfect for inspection + +**2. Why camera_node is separate from ibvs_action_server:** +- Cameras must publish at 30 Hz continuously even when no inspection is running +- If cameras were inside the action server, they'd only run during a goal +- Separation also allows other nodes (e.g., a monitoring dashboard) to subscribe to camera feeds independently + +**3. How ByteTrack handles multiple objects:** +- Frame N: detects objects → assigns IDs using IoU matching with previous frame +- High confidence detections: tracked immediately +- Low confidence: buffered, given ID only if reappears consistently +- Result: even if same-class objects move slightly between frames, IDs stay stable + +**4. Why YOLO class name = folder name = MQTT tag:** +- The YOLO model already knows what it detected: `results.names[int(box.cls[0])]` +- No extra classification step needed — YOLO output IS the label +- Folder: `captures/inspection/SESSION/fire_extinguisher/instance_1/img_01.jpg` +- MQTT payload: `"class_name": "fire_extinguisher"` +- Everything consistently labeled from the single YOLO detection step + +**5. Front/Back zone — equirectangular projection:** +- Insta360 outputs a flat equirectangular image of the 360° sphere +- Physical interpretation: top rows of image = forward/front direction, bottom rows = backward/back +- A gauge at y=100 (in a 480px tall image) is physically in front of the robot +- A fire extinguisher at y=350 is physically behind the robot +- This lets us make a front/back decision using just a y-threshold with no 3D geometry + +**6. ThingsBoard MQTT — why base64 for images:** +- ThingsBoard telemetry is designed for scalar values (temperature, pressure, etc.) +- Images don't have a native MQTT type — base64 encoding converts binary image to a JSON-safe string +- Limitation: ThingsBoard stores it as a string value, not a viewable image +- Future: use ThingsBoard's HTTP API to upload images as proper attachments, or use a dedicated image server + +--- + +*Report prepared by: Dinethra | 2026-03-10* diff --git a/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/docs/WEEKLY_REPORT_WEEK5.md b/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/docs/WEEKLY_REPORT_WEEK5.md new file mode 100644 index 00000000..4bcfd758 --- /dev/null +++ b/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/docs/WEEKLY_REPORT_WEEK5.md @@ -0,0 +1,262 @@ +# Weekly Progress Report — Visual Inspection System (Week 5) +# Period: 11 March – 17 March 2026 + +--- + +## Slide 1: Last Week Recap + This Week Focus + +**Last week completed (Week 4):** +- Full ROS2 conversion done — camera_node, servo_node, ibvs_action_server +- MQTT integration working with ThingsBoard +- Behaviour Tree leaf nodes provided to Ravith +- Pipeline running end-to-end + +**This week's focus: Dataset Collection + Evaluation** + +| Task | Status | +|------|--------| +| Research evaluation methods for this type of pipeline | ✅ Done | +| Design flexible data collection script | ✅ Done | +| Collect evaluation dataset (angle, distance, occlusion, reference) | ✅ Partial | +| Identify & fix YOLO detection degradation after ROS conversion | ✅ Root cause found | +| Collect YOLO retraining dataset (fire ext, door, gauge) | ✅ Started | + +**Script for Slide 1:** +"Last week we had the full ROS2 pipeline working. This week the focus shifted to evaluation — but before collecting data, I first did research on what evaluation methods are appropriate for a system like this, then built the tooling to actually collect the data. We hit some blockers along the way which I'll go through." + +--- + +## Slide 2: Evaluation Method Research + +**Key question:** How do you evaluate a visual inspection pipeline for a research paper? + +The system has three separate pipelines — each needs different metrics: + +| Pipeline | What to measure | Metrics | +|----------|----------------|---------| +| **ROI Capture** (Jetson) | Does it reliably center and capture the object? | IBVS convergence time, final pixel error, convergence rate, FPS | +| **Gauge Reading** (Server) | Is the analog reading numerically accurate? | MAE, RMSE, R², reading error distribution | +| **VLM Decision** (Server) | Are PASS/FAIL decisions correct? | Accuracy, Precision/Recall, F1, BERTScore, LLM-as-judge | + +**Robustness test conditions designed:** + +| Variable | Levels tested | +|----------|--------------| +| Horizontal angle | 0°, 15°L, 30°L, 15°R, 30°R, 45°L, 45°R | +| Distance | 0.75m, 1m, 1.25m, 1.5m, 2m, 3m, 4m | +| Occlusion | 0%, 25%, 50%, 75% | +| Multi-object | 1, 2, 3 objects in scene | + +**Key insight:** For a publication, minimum 50 samples per condition are needed +(30 is CLT minimum but outliers skew results — 50 gives robust mean ± std and 95% CI) + +*(Insert: screenshot of evaluation_plan.md showing the full metric breakdown)* + +**Script for Slide 2:** +"Before collecting any data I researched what evaluation methods are appropriate for each part of this system — because the three pipelines need completely different metrics. + +For the **ROI capture pipeline** — the part on Jetson — we measure IBVS centering performance: convergence time in seconds, final pixel error at convergence (target under 10 pixels), and convergence rate as a percentage of successful runs. On top of that, image quality metrics: SSIM and PSNR tell us spatial fidelity, VIF tells us perceived visual quality from a human perspective, and AlexNet cosine similarity compares the captured image to a reference to see if the captured content matches what was expected. + +For the **gauge reading pipeline** on the server — we compute MAE and RMSE between the predicted reading and the true physical value, plus R-squared to show how well the model tracks across the full range of gauge values. + +For the **VLM decision pipeline** — we use classification metrics: precision, recall, F1 for the PASS/FAIL decisions, BERTScore to evaluate how semantically close the VLM's text output is to a ground-truth caption, and LLM-as-judge where a second language model scores the output for correctness. + +The test conditions are: 7 distances from 0.75m to 4m, 7 horizontal angles, 4 occlusion levels, and multi-object scenes. We are starting with reference baseline and distance evaluation first — because those do not require YOLO to work at angles, and YOLO currently has a detection quality issue at close range which we are fixing through retraining. Once retraining is done, all test conditions will be unlocked." + +--- + +## Slide 3: Flexible Data Collection Script + +**Problem with previous approach:** No structured way to vary angle, distance, occlusion — had to manually move things and remember to log everything. + +**Solution built: `collect_dataset.py`** + +``` +Main menu: + 1. Angle evaluation → enter exact angle + direction + 2. Distance evaluation → enter exact measured distance + 3. Occlusion testing → enter exact % occlusion applied + 4. Reference baseline → clean unoccluded captures + 5. VLM / Gauge special sessions + +For each capture: + 1. User enters: angle/distance/occlusion value + how many images + 2. Script runs full pipeline automatically (camera → YOLO → IBVS → capture) + 3. Automatically detects new img_01.jpg + 4. Automatically parses IBVS convergence time and final error from terminal output + 5. Auto-copies image to correct evaluation folder + 6. Logs everything to capture_log.csv (timestamp, angle, distance, occlusion, IBVS stats) +``` + +**What's automated (no manual steps):** +- Running the pipeline +- Detecting new captured image +- Copying to correct folder +- Logging IBVS convergence time and error + +*(Insert: screenshot of script running — menu + progress output)* + +**Script for Slide 3:** +"The data collection script handles everything automatically. You enter the experimental condition — the angle you measured, the distance, the occlusion percentage — then press enter. The script runs the full pipeline, waits for it to complete, pulls out the IBVS stats from the output, copies the image to the right folder, and logs everything to a CSV. There's no manual file copying or data entry — which means less errors and faster collection." + +--- + +## Slide 4: Blocker — Joystick Control Didn't Work + +**Plan:** Use a USB joystick to control the pan-tilt mechanism during data collection (move servo to desired angle → capture) + +**What happened:** +- Joystick detection was unreliable (SDL2 / pygame device enumeration issues on Jetson) +- Even when detected, analog stick values drifted — servos twitched instead of holding position +- Wasted time debugging hardware compatibility + +**Solution:** Replaced joystick with **keyboard arrow key control** in the YOLO dataset collection script + +```python +# In collect_yolo_dataset.py — cv2.waitKey() captures arrow keys: +cv2.waitKey(30) & 0xFF +# Left/Right arrow → pan servo (+/- step degrees) +# Up/Down arrow → tilt servo (+/- step degrees) +# SPACE → capture both cameras simultaneously +# +/- → adjust step size (1°–15° per keypress) +# h → home (90°, 90°) +# c → change class +``` + +**Result:** More reliable than joystick — step size is controllable, no drift, works over SSH + +**Script for Slide 4:** +"The original plan was to use a joystick for fine servo control during data collection. That didn't work — the joystick detection on Jetson was unreliable and even when it worked the stick drifted and caused servo twitching. I replaced it with keyboard control using OpenCV's waitKey — arrow keys move the servos in configurable steps, space bar captures both cameras simultaneously. It's actually more controllable than a joystick because you can set the step size and each press is exactly that many degrees." + +--- + +## Slide 5: Dataset Collection — What We Got + +**Evaluation dataset collected (`evaluation/`):** + +| Folder | Images | Notes | +|--------|--------|-------| +| `reference/` | 2 images | Clean baseline — fire extinguisher, head-on, 1m | +| `distance_eval/0.75m/` | 2 images | Close range | +| `distance_eval/1m/` | 2 images | Standard range | +| `distance_eval/1.25m/` | 1 image | | +| `distance_eval/1.5m/` | 1 image | | + +**Observation on captured images:** +- ROI images are **clear and well-centred** — IBVS working correctly at 1-2m +- Image quality sufficient for VLM evaluation at these distances +- Angle evaluation not yet completed (blocked by YOLO issue — see next slide) + +**YOLO training dataset collected (`yolo_dataset/`):** +- Classes: `fire_extinguisher`, `door`, `gauge` +- Cameras: Insta360 + Logitech captured simultaneously per class +- Purpose: Re-train YOLO model to improve detection robustness + +*(Insert: sample ROI capture showing clean centered fire extinguisher)* + +**Script for Slide 5:** +"We started collecting the evaluation dataset and the images look good — the IBVS centering is working and the ROI captures are clear. However we hit a significant blocker with YOLO detection that prevented completing the angle evaluation. I'll explain that on the next slide. In parallel I also set up a separate dataset collection for YOLO retraining — capturing both cameras simultaneously for three classes." + +--- + +## Slide 6: Blocker — YOLO Detection Degraded After ROS Conversion + +**Symptom:** After converting to ROS2, YOLO detection on the Logitech camera was producing: +- Double bounding boxes (body + nozzle detected separately) +- False positive: red fire safety cabinet on wall detected as fire extinguisher +- Lower confidence scores than before ROS conversion +- Detection failing at close range / changing lighting + +**Before ROS conversion (direct Python):** Clean single tight bounding box at all tested conditions + +**Root cause found:** + +```python +# Original ibvs_pipeline.py (line 603-605): +frame_logi_display = cv2.resize(frame_logi, (640, 360)) # ← RESIZED +results_logi = model(frame_logi_display, ...) # ← YOLO on 640x360 + +# ROS2 ibvs_action_server.py (before fix): +frame = self._get_logi() # 640x480 raw frame +results = self.model(frame) # ← YOLO on 640x480 ← MISMATCH +``` + +**The TensorRT engine was compiled for 640×360 input.** +Feeding it 640×480 caused letterboxing artifacts → degraded confidence scores → false positives pass through. + +**Fix applied:** +```python +# _detect_raw() now resizes before YOLO, scales coords back after: +YOLO_INPUT_H = 360 +if h_orig != self.YOLO_INPUT_H: + yolo_frame = cv2.resize(frame, (w_orig, self.YOLO_INPUT_H)) + scale_y = h_orig / self.YOLO_INPUT_H # 480/360 = 1.333 +results = self.model(yolo_frame, ...) +# Then scale y-coords back × 1.333 for correct IBVS pixel error +``` + +*(Insert: before/after comparison — multiple noisy boxes vs clean single box)* + +**Script for Slide 6:** +"This was the most important blocker of the week. YOLO detection that was working perfectly before ROS conversion was producing garbage results after. It took some investigation to find the root cause — the TensorRT engine was compiled for 640 by 360 input, but the ROS camera node was publishing at 640 by 480, and nobody was resizing before inference. The fix is a single resize in the detection function — we resize down, run YOLO to get coordinates in 360 space, then scale the coordinates back up to 480 space for the IBVS pixel error calculation." + +--- + +## Slide 7: Current Status + Next Week Plan + +**What's working now:** +- ✅ YOLO detection fix applied (resize 640×480 → 640×360 before TRT inference) +- ✅ Reference dataset captured and ready for evaluation +- ✅ YOLO retraining dataset collection started (fire_ext, door, gauge) +- ✅ Evaluation folder structure in place, CSV logging working + +**What we can do in parallel now:** + +| Task | Status | Who | +|------|--------|-----| +| Run evaluation on reference dataset (IBVS + image quality metrics) | **Can start now** | Dinethra | +| Complete angle / distance / occlusion data collection | Next session | Dinethra | +| YOLO retraining (label collected data → fine-tune) | After more data | Dinethra | +| BT integration testing with Ravith | Continues | Ravith + Dinethra | + +**Next week targets:** +- Complete distance evaluation (7 distances × 50 images each) +- Complete angle evaluation (7 angles × 50 images each) +- Run image quality metrics (SSIM, PSNR, VIF) on collected reference images +- Send labelled YOLO training data → begin fine-tuning run +- IBVS convergence stats: mean, std, 95% CI per condition + +**Script for Slide 7:** +"The plan for next week is parallel — while more data collection continues, we can already start running the evaluation metrics on the reference data we have. The evaluation scripts are ready. For YOLO retraining, once we have enough images labeled (targeting 200+ per class), we'll run a fine-tuning run and benchmark improvement. The goal is to have all angle and distance evaluation data collected by end of next week so we can start statistical analysis." + +--- + +## Technical Notes (for your reference) + +**Why TRT engine input size matters:** +- TensorRT compiles a fixed computation graph optimized for one specific input shape +- Feeding a different shape: engine internally letterboxes/pads → different feature map alignment → lower detection quality +- Always export TRT engine at the same resolution used during inference + +**Why 50 samples minimum per condition:** +- 30 samples satisfies the Central Limit Theorem (distribution of sample mean → normal) +- But with 30 samples, a single outlier shifts the mean by ~3% +- With 50 samples, one outlier = ~2% shift — reportable with ±std error bars in paper +- For a conference paper table: report N, mean ± std, min, max, 95% CI + +**Evaluation file locations:** +``` +Local machine: + Evaluation_V_I_ws/eval_dataset/ ← pulled from Jetson evaluation/ + data/captures_from_jetson/ ← pulled from Jetson captures/ + data/yolo_dataset/ ← pulled from Jetson yolo_dataset/ + +Jetson: + ~/Documents/Visual_Inspection_ws/evaluation/ + ~/Documents/Visual_Inspection_ws/captures/ + ~/Documents/Visual_Inspection_ws/yolo_dataset/ +``` + +--- + +*Report prepared by: Dinethra | 2026-03-17* diff --git a/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/inspection_ws/PLAN.md b/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/inspection_ws/PLAN.md new file mode 100644 index 00000000..bfc5cfca --- /dev/null +++ b/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/inspection_ws/PLAN.md @@ -0,0 +1,272 @@ +# Visual Inspection — ROS2 Integration + Behaviour Tree Plan + +**Stack:** ROS2 Humble + py_trees_ros + Python +**Existing:** Working ibvs_pipeline.py on Jetson (DO NOT break this) +**Goal:** Wrap existing pipeline into ROS2 nodes → connect to main BT + +--- + +## 📁 Current Jetson Workspace (flat, working) + +``` +~/Documents/Visual_Inspection_ws/ +├── venv/ ← existing, keep +├── weights/yolo11n.engine ← existing TRT engine, keep +├── ibvs_pipeline.py ← existing working pipeline, keep +├── ibvs_headless.py ← existing launcher, keep +├── ibvs_headed.py ← existing launcher, keep +├── calibration_config.py ← existing, keep +├── config/ ← existing camera calibration, keep +└── inspection_ws/ ← NEW folder SCP'd from laptop + └── (our new ROS2 code) +``` + +**Rule:** Never modify existing files. New ROS2 nodes import from them. + +--- + +## ✅ Step 0 (DONE) — Python Pipeline Working + +- ✅ Insta360 YOLO detection (TensorRT FP16, ~30 FPS) +- ✅ COARSE positioning (cubic formula → servo) +- ✅ FINE centering (IBVS, <10px error) +- ✅ Camera auto-detection (udev symlinks) +- ✅ Arduino auto-detection (USB vendor ID) + +--- + +## 🔄 Step 1 — ROS2 Conversion (DO THIS FIRST) + +Convert the working Python pipeline into ROS2 nodes so the BT person can connect. + +### 1a. ROS2 Package Structure + +``` +inspection_ws/ +├── README.md ← for your friend +├── PLAN.md ← this file +├── setup_ros.sh ← install script for Jetson +│ +└── visual_inspection_ros/ ← ROS2 package + ├── package.xml + ├── setup.py + ├── setup.cfg + ├── resource/ + │ └── visual_inspection_ros + ├── action/ + │ └── InspectObjects.action ← custom ROS2 action definition + ├── msg/ + │ └── InspectionResult.msg ← custom message + └── visual_inspection_ros/ + ├── __init__.py + ├── camera_node.py ← Node 1: publish camera feeds + ├── servo_node.py ← Node 2: Arduino servo control + ├── ibvs_action_server.py ← Node 3: IBVS as Action Server + ├── inspection_node.py ← Node 4: full inspection coordinator + └── bt_nodes/ + ├── __init__.py + ├── detect_bt_node.py ← BT node: Insta360 detect + ├── coarse_bt_node.py ← BT node: coarse position + ├── ibvs_bt_node.py ← BT node: IBVS center + ├── capture_bt_node.py ← BT node: capture 4 images + └── mqtt_bt_node.py ← BT node: publish to MQTT +``` + +--- + +### 1b. ROS2 Topics and Actions We Expose + +**Topics (published by us):** + +| Topic | Type | Description | +|-------|------|-------------| +| `/visual_inspection/insta360/image_raw` | `sensor_msgs/Image` | Insta360 camera feed | +| `/visual_inspection/logitech/image_raw` | `sensor_msgs/Image` | Logitech camera feed | +| `/visual_inspection/status` | `std_msgs/String` | Pipeline status (IDLE/COARSE/FINE/CAPTURING) | +| `/visual_inspection/ibvs_error` | `geometry_msgs/Point` | Current IBVS pixel error (x, y) | +| `/visual_inspection/detections` | `std_msgs/String` | JSON of detected objects | + +**Topics (subscribed by us):** + +| Topic | Type | Description | +|-------|------|-------------| +| `/servo/pan_tilt` | `std_msgs/Int16MultiArray` | Direct servo command [tilt, pan] | + +**Actions (we are the server, BT calls us):** + +``` +Action: /visual_inspection/inspect_objects +Type: visual_inspection_ros/action/InspectObjects + +Goal: + int32 max_objects # 0 = inspect all detected, N = inspect only N + bool return_home # true = servo returns 90,90 after done + +Result: + bool success + int32 objects_inspected + string failed_reason # "no_detection" / "ibvs_timeout" / "mqtt_error" / "" + +Feedback: + string current_step # "detecting" / "coarse" / "ibvs" / "capturing" / "publishing" + int32 current_object # which object number currently processing + float ibvs_error_px # current pixel error +``` + +--- + +### 1c. The 4 ROS2 Nodes + +#### Node 1: `camera_node.py` +``` +Name: /visual_inspection/camera_publisher +Publishes: + /visual_inspection/insta360/image_raw + /visual_inspection/logitech/image_raw +Frequency: ~30 Hz +Uses: cv2.VideoCapture with existing udev symlinks (/dev/insta360, /dev/logitech) +``` + +#### Node 2: `servo_node.py` +``` +Name: /visual_inspection/servo_controller +Subscribes: /servo/pan_tilt [tilt, pan] as Int16MultiArray +Action: writes to Arduino serial (/dev/arduino) +Uses: existing find_arduino() from ibvs_pipeline.py +``` + +#### Node 3: `ibvs_action_server.py` +``` +Name: /visual_inspection/ibvs_server +Action Server: /visual_inspection/inspect_objects +When called: + 1. Runs YOLO on Insta360 frame (using existing TRT model) + 2. COARSE: cubic formula → publishes to /servo/pan_tilt + 3. FINE: IBVS loop → publishes to /servo/pan_tilt + 4. CAPTURE: 4 images → publishes via MQTT + 5. Sends result back to BT +Publishes feedback: step-by-step progress +``` + +#### Node 4: `inspection_node.py` (optional orchestrator) +``` +Name: /visual_inspection/inspection_coordinator +Manages: multi-object queue, ByteTracker IDs, inspected list +``` + +--- + +## 🌳 Step 2 — Behaviour Tree Integration (py_trees_ros) + +**Your friend builds the main BT. You provide BT leaf nodes.** + +### BT Nodes We Provide + +Your friend adds these leaf nodes to the main tree: + +```python +# They import from our package: +from visual_inspection_ros.bt_nodes import ( + DetectObjectsNode, # Condition: checks if YOLO sees objects + InspectObjectsNode, # Action: runs full inspection (wraps ROS2 Action) +) +``` + +### What Goes Inside the BT (our section) + +``` +[BT Section: Visual Inspection] ← your friend places this in the main tree +│ +└── Sequence + ├── Condition: IsRobotStationary ← from navigation BT (not us) + │ + ├── Action: InspectObjects ← OUR action node + │ (internally does COARSE+IBVS+CAPTURE+MQTT for all objects) + │ SUCCESS → BT continues + │ FAILURE(no_detection) → BT handles (rotate robot & retry) + │ FAILURE(ibvs_timeout) → BT handles (skip or alert) + │ + └── Action: PublishInspectionSummary ← OUR node (optional) +``` + +### Our BT Leaf Node Classes + +```python +# detect_bt_node.py +class DetectObjectsNode(py_trees.behaviour.Behaviour): + """Condition: Returns SUCCESS if YOLO detects objects on Insta360""" + # Stores detected objects in BT Blackboard for next nodes + +# ibvs_bt_node.py +class InspectObjectsNode(py_trees_ros.action_clients.Fromaction): + """Action: Calls /visual_inspection/inspect_objects ROS2 action""" + # Wraps ROS2 action as py_trees node + # Handles RUNNING/SUCCESS/FAILURE states +``` + +--- + +## 📋 Implementation Order + +``` +Week 1: ROS2 Conversion +├── [ ] Create ROS2 package (visual_inspection_ros) +├── [ ] camera_node.py — publish both cameras as ROS2 topics +├── [ ] servo_node.py — Arduino control via ROS2 topic +├── [ ] ibvs_action_server.py — IBVS as ROS2 Action Server +└── [ ] Test: ros2 action send_goal /visual_inspection/inspect_objects ... + +Week 2: Multi-Object + Capture +├── [ ] ByteTracker integration (Ultralytics model.track() with persist=True) +├── [ ] 4-image capture with IBVS re-centering between captures +├── [ ] Save images + metadata JSON to capture/ folder +└── [ ] Test multiple object scenario + +Week 3: MQTT +├── [ ] MQTTPublisher class (paho-mqtt, already in existing venv) +├── [ ] Publish 4 images + metadata per object +├── [ ] Connect to broker (need broker IP from team) +└── [ ] Test end-to-end image delivery + +Week 4: BT Nodes +├── [ ] DetectObjectsNode (py_trees Condition) +├── [ ] InspectObjectsNode (py_trees_ros Action client) +├── [ ] Share node classes with BT person +└── [ ] Integration test with main BT +``` + +--- + +## 🛠️ Setup on Jetson + +```bash +# 1. Source existing venv (has all dependencies) +source ~/Documents/Visual_Inspection_ws/venv/bin/activate + +# 2. Source ROS2 Humble +source /opt/ros/humble/setup.bash + +# 3. Build our package +cd ~/Documents/Visual_Inspection_ws/inspection_ws +colcon build --packages-select visual_inspection_ros +source install/setup.bash + +# 4. Run nodes +ros2 run visual_inspection_ros camera_node +ros2 run visual_inspection_ros ibvs_action_server + +# 5. Test action from terminal (instead of BT) +ros2 action send_goal /visual_inspection/inspect_objects \ + visual_inspection_ros/action/InspectObjects \ + "{max_objects: 0, return_home: true}" +``` + +--- + +## ❓ Open Questions (confirm with team) + +1. **MQTT broker IP/hostname** — needed for mqtt_bt_node.py +2. **Gazebo** — for simulation testing? which robot model? +3. **Shared BT blackboard keys** — what key names does main BT use for robot state? +4. **py_trees_ros version** — `pip install py_trees_ros` or ROS apt package? +5. **How to trigger us** — does BT call our action, or does main BT read our topic? diff --git a/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/inspection_ws/README.md b/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/inspection_ws/README.md index ceb78f96..1878653e 100644 --- a/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/inspection_ws/README.md +++ b/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/inspection_ws/README.md @@ -1,6 +1,7 @@ # Visual Inspection System — ROS2 Package -**Stack:** ROS2 Humble · Python 3.10 · YOLO11 TensorRT · ByteTrack · paho-mqtt · py_trees +**Stack:** ROS2 Humble · Python 3.10 · YOLOv26s TensorRT · ByteTrack · paho-mqtt · py_trees **Hardware:** Jetson Orin Nano · Insta360 camera · Logitech camera · Arduino (pan-tilt servos) +*Updated: 2026-04-20 — Migrated from ROS2 Action → ROS2 Service* --- @@ -30,26 +31,25 @@ ros2 run visual_inspection_ros servo_node ``` ✅ Ready when you see: `Servo node ready` -### Terminal 3 — IBVS Action Server (main pipeline) +### Terminal 3 — Inspection Service (main pipeline) ```bash source /opt/ros/humble/setup.bash source ~/Documents/Visual_Inspection_ws/inspection_ws/install/setup.bash -ros2 run visual_inspection_ros ibvs_action_server +ros2 run visual_inspection_ros inspection_service ``` ✅ Ready when you see: ``` -Action server ready at /visual_inspection/inspect_objects -MQTT broker: demo.thingsboard.io:1883 +Inspection service ready: /visual_inspection/inspect ``` -### Terminal 4 — Test (replaces Behaviour Tree until BT is connected) +### Terminal 4 — Test (simulates Behaviour Tree) ```bash source /opt/ros/humble/setup.bash source ~/Documents/Visual_Inspection_ws/inspection_ws/install/setup.bash -python3 ~/Documents/Visual_Inspection_ws/test_scripts/test_full_pipeline.py +python3 ~/Documents/Visual_Inspection_ws/test_scripts/test_inspection_service.py --object fire_extinguisher ``` -### Terminal 5 — RViz2 (live debug view) +### Terminal 5 — RViz2 (optional debug view) ```bash source /opt/ros/humble/setup.bash rviz2 @@ -65,9 +65,11 @@ inspection_ws/ ├── README.md ← this file ├── PLAN.md ← original development plan │ -├── visual_inspection_interfaces/ ← ROS2 custom message/action package -│ └── action/ -│ └── InspectObjects.action ← action definition (see below) +├── visual_inspection_interfaces/ ← ROS2 custom message/service/action package +│ ├── action/ +│ │ └── InspectObjects.action ← legacy action (kept) +│ └── srv/ +│ └── Inspect.srv ← ✅ ACTIVE service definition │ └── visual_inspection_ros/ ← ROS2 node package ├── package.xml @@ -75,61 +77,70 @@ inspection_ws/ └── visual_inspection_ros/ ├── camera_node.py ← Node 1: publish both cameras ├── servo_node.py ← Node 2: Arduino servo control - ├── ibvs_action_server.py ← Node 3: full inspection pipeline + ├── ibvs_action_server.py ← Node 3: legacy action server (base class) + ├── inspection_service.py ← Node 4: ✅ ACTIVE service server └── bt_nodes/ - ├── __init__.py - └── inspection_bt_nodes.py ← BT leaf nodes (for Ravith) + └── inspection_bt_nodes.py ← BT leaf nodes ``` --- -## Action Interface — InspectObjects.action +## Service Interface — Inspect.srv ``` -# ── GOAL (what you send to start inspection) ───────────────────────────────── -int32 max_objects # 0 = inspect all detected front objects, N = first N only -bool return_home # true = servos return to 90,90 after done -string location_label # label from BT e.g. "gauge_room_A" / "unknown" -bool overview_only # true = just capture Insta360 snapshots (no IBVS) -int32 overview_count # how many Insta360 images (used when overview_only=true) - -# ── RESULT (what you get back) ──────────────────────────────────────────────── -bool success -int32 objects_inspected # how many objects were fully centered + captured -int32 objects_found # total detected on Insta360 (front + back) -bool object_in_back # true → BT must rotate robot 180° and retry -string failed_reason # "": success | "no_detection" | "ibvs_timeout" | - # "logi_no_detection" | "all_in_back" - -# ── FEEDBACK (streamed while running) ──────────────────────────────────────── -string current_step # "detecting" / "coarse" / "ibvs" / "capturing" / "overview" -int32 current_object # which object is being processed -float32 ibvs_error_px # live pixel error during IBVS centering +# ── REQUEST ─────────────────────────────────────────────────────────────── +string target_object # "fire_extinguisher" | "door" | "person" | "gauge" + # "unknown" | "main_cylinder" → overview-only + # "" or "any" → detect all classes +string location_label # Location from BT e.g. "engine_room_A" +int32 max_objects # 0 = inspect all, N = first N +bool return_home # true = servo returns to 90°,90° when done + +# ── RESPONSE ────────────────────────────────────────────────────────────── +bool success # true if ≥1 object inspected +string status # "ok" | "no_detection" | "ibvs_timeout" | + # "all_in_back" | "no_frames" | "busy" +int32 objects_found # total detected on Insta360 +int32 objects_inspected # successfully centred + captured +bool object_in_back # true → BT must rotate robot 180° and retry +string[] image_paths # absolute paths to all saved images +string info # human-readable summary ``` +### YOLO Model Classes (yolov26s.engine) + +| ID | YOLO Name | BT `target_object` | Confidence | +|---|---|---|---| +| 0 | `door` | `"door"` | 0.5 | +| 1 | `extinguisher` | `"fire_extinguisher"`, `"extinguisher"` | 0.5 | +| 2 | `gauge` | `"gauge"`, `"pressure_gauge"` | **0.3** | +| 3 | `person` | `"person"`, `"people"` | 0.5 | + +*`unknown` and `main_cylinder` → overview-only (not YOLO classes)* + --- ## All ROS2 Topics -### Published by us +### Published | Topic | Type | Description | |-------|------|-------------| | `/visual_inspection/insta360/image_raw` | `sensor_msgs/Image` | Insta360 raw feed (360°) | | `/visual_inspection/logitech/image_raw` | `sensor_msgs/Image` | Logitech raw feed (close-up) | -| `/visual_inspection/debug` | `sensor_msgs/Image` | **Combined debug view** — side-by-side Insta360 + Logitech with bounding boxes, class names, IBVS arrow, mode, FPS | -| `/visual_inspection/status` | `std_msgs/String` | Current mode: `IDLE` / `DETECTING` / `COARSE` / `IBVS` / `CAPTURING` / `OVERVIEW` | -| `/visual_inspection/ibvs_error` | `geometry_msgs/Point` | IBVS pixel error: x=ex, y=ey, z=total magnitude | -| `/visual_inspection/detections` | `std_msgs/String` | JSON list of detected objects with class, confidence, track_id, front/back zone | +| `/visual_inspection/debug` | `sensor_msgs/Image` | Combined debug view (optional) | +| `/visual_inspection/status` | `std_msgs/String` | `IDLE`/`DETECTING`/`COARSE`/`IBVS`/`CAPTURING`/`OVERVIEW`/`SWEEP` | +| `/visual_inspection/ibvs_error` | `geometry_msgs/Point` | IBVS pixel error | +| `/visual_inspection/detections` | `std_msgs/String` | JSON detected objects | -### Subscribed by us +### Subscribed | Topic | Type | Description | |-------|------|-------------| -| `/servo/pan_tilt` | `std_msgs/Int16MultiArray` | `[tilt, pan]` servo command (0-180°) | +| `/servo/pan_tilt` | `std_msgs/Int16MultiArray` | `[tilt, pan]` servo command | -### Actions -| Action | Type | Description | +### Services (active) +| Service | Type | Description | |--------|------|-------------| -| `/visual_inspection/inspect_objects` | `InspectObjects` | Full pipeline: detect → coarse → IBVS → capture → MQTT | +| `/visual_inspection/inspect` | `Inspect.srv` | ✅ Full pipeline — request/response | --- @@ -239,29 +250,38 @@ ls -lR ~/Documents/Visual_Inspection_ws/captures/ ## Test Commands (Without Behaviour Tree) -### Full inspection (most common test) ```bash -python3 ~/Documents/Visual_Inspection_ws/test_scripts/test_full_pipeline.py -``` +source /opt/ros/humble/setup.bash +source ~/Documents/Visual_Inspection_ws/inspection_ws/install/setup.bash -### Single manual goal (terminal) -```bash -ros2 action send_goal /visual_inspection/inspect_objects \ - visual_inspection_interfaces/action/InspectObjects \ - "{max_objects: 0, return_home: true, location_label: 'unknown', overview_only: false, overview_count: 2}" -``` +# 1. Fire extinguisher +python3 ~/Documents/Visual_Inspection_ws/test_scripts/test_inspection_service.py --object fire_extinguisher -### Overview-only mode (BT requests 360 snapshot for VLM) -```bash -ros2 action send_goal /visual_inspection/inspect_objects \ - visual_inspection_interfaces/action/InspectObjects \ - "{max_objects: 0, return_home: false, location_label: 'gauge_room_A', overview_only: true, overview_count: 2}" +# 2. Door +python3 ~/Documents/Visual_Inspection_ws/test_scripts/test_inspection_service.py --object door + +# 3. Person +python3 ~/Documents/Visual_Inspection_ws/test_scripts/test_inspection_service.py --object person + +# 4. Gauge (with sweep fallback) +python3 ~/Documents/Visual_Inspection_ws/test_scripts/test_inspection_service.py --object gauge + +# 5. Unknown (overview only) +python3 ~/Documents/Visual_Inspection_ws/test_scripts/test_inspection_service.py --object unknown + +# 6. Main cylinder (overview only) +python3 ~/Documents/Visual_Inspection_ws/test_scripts/test_inspection_service.py --object main_cylinder + +# 7. All classes +python3 ~/Documents/Visual_Inspection_ws/test_scripts/test_inspection_service.py ``` -### Run the BT tree directly +### Check service manually (ros2 CLI) ```bash -pip install py_trees # first time only -ros2 run visual_inspection_ros run_inspection_bt +ros2 service list +ros2 service call /visual_inspection/inspect \ + visual_inspection_interfaces/srv/Inspect \ + "{target_object: 'gauge', location_label: 'test', max_objects: 0, return_home: true}" ``` --- @@ -398,37 +418,39 @@ Login → Devices → "inspection" → Latest Telemetry --- -## Configuration — Key Constants (ibvs_action_server.py) +## Configuration — Key Constants (inspection_service.py / ibvs_action_server.py) | Constant | Default | Description | |----------|---------|-------------| -| `INSTA_SEARCH_TIMEOUT` | 20s | Max wait for object detection on Insta360 | -| `IBVS_TOTAL_TIMEOUT` | 40s | Max time for IBVS centering per object | -| `IBVS_TOL_PX` | 10px | Error threshold to consider "centered" | -| `FOCUS_WAIT` | 10s | Autofocus wait after IBVS before capture | -| `IMAGES_PER_OBJ` | 4 | Logitech ROI images per object | -| `FRONT_Y_MAX` | 200px | Insta360 y-threshold: above=BACK, below=FRONT | -| `TILT_REVERSED` | True | Flip tilt direction (hardware-specific) | -| `KEEP_LOCAL` | True | Always keep images locally (dataset mode) | -| `CONF_INSTA` | 0.5 | YOLO confidence for Insta360 detection | -| `CONF_IBVS` | 0.3 | YOLO confidence for Logitech IBVS | +| `INSTA_SEARCH_TIMEOUT` | 20s | Max wait for object on Insta360 | +| `GAUGE_INSTA_TIMEOUT` | 8s | Gauge-specific shorter timeout before sweep | +| `IBVS_TOTAL_TIMEOUT` | 40s | Max IBVS centering time | +| `IBVS_TOL_PX` | 10px | Centred threshold | +| `FOCUS_WAIT` | 10s | Autofocus wait before capture | +| `IMAGES_PER_OBJ` | 3 | Logitech ROI images per object | +| `FRONT_Y_MAX` | 200px | Insta360 front/back split line | +| `TILT_REVERSED` | True | Flip tilt (hardware-specific) | +| `KEEP_LOCAL` | True | Always keep images locally | +| `CLASS_CONF['gauge']` | 0.3 | Lower threshold for gauge | +| `CLASS_CONF['*']` | 0.5 | Threshold for all other classes | +| `SWEEP_TILTS` | [20,50,80] | Gauge sweep tilt positions | +| `SWEEP_PAN_RANGE` | (20,160) | Gauge sweep pan range | --- ## Rebuild After Code Changes ```bash -# Only needed when InspectObjects.action file is changed: +# Full rebuild (after changing .srv or .action files): cd ~/Documents/Visual_Inspection_ws/inspection_ws -rm -rf build/visual_inspection_interfaces install/visual_inspection_interfaces source /opt/ros/humble/setup.bash colcon build --packages-select visual_inspection_interfaces visual_inspection_ros source install/setup.bash -# For Python file changes only (ibvs_action_server.py etc.) — NO rebuild needed: -# Just SCP the file from laptop and restart the node -pkill -f ibvs_action_server && sleep 1 -ros2 run visual_inspection_ros ibvs_action_server +# Python-only changes (inspection_service.py, ibvs_action_server.py): +# Just SCP the file and restart — no rebuild needed +pkill -f inspection_service && sleep 1 +ros2 run visual_inspection_ros inspection_service ``` --- @@ -437,26 +459,24 @@ ros2 run visual_inspection_ros ibvs_action_server | Problem | Fix | |---------|-----| -| `ImportError: cannot import name 'InspectObjects'` | Run `colcon build` and `source install/setup.bash` | -| `ValueError: too many values to unpack` | SCP latest ibvs_action_server.py | -| `ModuleNotFoundError: No module named 'em'` | Don't activate `.venv` before `colcon build` on laptop | -| No camera feed | Check `/dev/insta360` and `/dev/logitech` exist: `ls /dev/insta360` | -| Servo not moving | Check Arduino connected: `ls /dev/ttyUSB*` | -| IBVS not converging (timeout 40s) | Move object closer, check Logitech camera focused | -| MQTT no telemetry on ThingsBoard | Check `mqtt_config.yaml` access token; check MQTT broker reachable | -| `failed_reason: ""` with status=6 | Crash in execute_callback — check Terminal 3 for traceback | -| Images blurry | Increase `FOCUS_WAIT` (currently 10s) in ibvs_action_server.py | +| `cannot import name 'Inspect'` | Run `colcon build` and `source install/setup.bash` | +| `Service not available` | Check `inspection_service` node is running | +| `No module named 'torch'` | Run `ln -sf ~/Documents/Visual_Inspection_ws/venv/lib/python3.10/site-packages/torch ~/.local/lib/python3.10/site-packages/torch` | +| No camera feed | Check `/dev/insta360` and `/dev/logitech` exist | +| Servo not moving | Check Arduino: `ls /dev/ttyUSB*` | +| IBVS timeout | Move object closer, check Logitech focus | +| Gauge never found | Sweep scan runs automatically — check logs for `SWEEP` | +| Images blurry | Increase `FOCUS_WAIT` (currently 10s) | --- ## YOLO Model -- **Path:** `~/Documents/Visual_Inspection_ws/weights/yolo11n.engine` -- **Format:** TensorRT FP16 (accelerated on Jetson GPU) ← loads first -- **Fallback:** `.pt` PyTorch file if `.engine` not found -- **Classes detected:** door, gauge, fire_extinguisher (trained classes) -- **Tracking:** ByteTrack (`model.track(persist=True, tracker='bytetrack.yaml')`) +- **Path:** `~/Documents/Visual_Inspection_ws/weights/yolov26s.engine` +- **Format:** TensorRT FP16 (Jetson GPU) +- **Classes:** `door`(0), `extinguisher`(1), `gauge`(2), `person`(3) +- **Tracking:** ByteTrack --- -*Last updated: 2026-03-09* +*Last updated: 2026-04-20* diff --git a/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/inspection_ws/visual_inspection_interfaces/CMakeLists.txt b/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/inspection_ws/visual_inspection_interfaces/CMakeLists.txt index 88aad9e4..62e084f6 100644 --- a/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/inspection_ws/visual_inspection_interfaces/CMakeLists.txt +++ b/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/inspection_ws/visual_inspection_interfaces/CMakeLists.txt @@ -6,6 +6,8 @@ find_package(rosidl_default_generators REQUIRED) rosidl_generate_interfaces(${PROJECT_NAME} "action/InspectObjects.action" + "srv/Inspect.srv" + "srv/UploadImages.srv" ) ament_export_dependencies(rosidl_default_runtime) diff --git a/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/inspection_ws/visual_inspection_interfaces/action/InspectObjects.action b/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/inspection_ws/visual_inspection_interfaces/action/InspectObjects.action index ce3ecd65..2b9f6445 100644 --- a/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/inspection_ws/visual_inspection_interfaces/action/InspectObjects.action +++ b/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/inspection_ws/visual_inspection_interfaces/action/InspectObjects.action @@ -8,6 +8,9 @@ string location_label # label for this location (from BT: "gauge_room_A" / "un bool overview_only # true = just capture Insta360 overview, skip IBVS entirely # BT uses this when it wants a 360 snapshot for VLM int32 overview_count # how many Insta360 images to capture (used when overview_only=true, default 2) +string target_object # object class to inspect — filters YOLO to this class only + # BT sends: "fire_extinguisher" | "door" | "person" | "gauge" + # "" or "any" = detect all classes (default behaviour) --- diff --git a/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/inspection_ws/visual_inspection_interfaces/srv/Inspect.srv b/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/inspection_ws/visual_inspection_interfaces/srv/Inspect.srv new file mode 100644 index 00000000..8c7c17d8 --- /dev/null +++ b/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/inspection_ws/visual_inspection_interfaces/srv/Inspect.srv @@ -0,0 +1,33 @@ +# Inspect.srv — ROS2 Service interface for visual inspection pipeline +# +# BT calls this like an API endpoint: +# Request → what to inspect and where +# Response ← what was found, image paths, status +# +# YOLO model classes: {0:'door', 1:'extinguisher', 2:'gauge', 3:'person'} +# Special classes: 'unknown', 'main_cylinder' → overview-only (no IBVS) + +# ── REQUEST ────────────────────────────────────────────────────────────────── +string target_object # Object to inspect: + # "fire_extinguisher" | "extinguisher" + # "door" | "person" | "gauge" + # "unknown" | "main_cylinder" → Insta360+Logitech overview only + # "" or "any" → detect all classes +string location_label # Location identifier from BT (e.g. "engine_room_A") + # Used in folder names and metadata +int32 max_objects # 0 = inspect all found objects, N = first N only +bool return_home # true = servo returns to 90°,90° when done + +--- + +# ── RESPONSE ───────────────────────────────────────────────────────────────── +bool success # true if at least one object was inspected +string status # "ok" | "no_detection" | "ibvs_timeout" | + # "all_in_back" | "no_frames" | "busy" | "error" +int32 objects_found # total objects detected (Insta360) +int32 objects_inspected # objects successfully centred + captured +bool object_in_back # true → BT should rotate robot 180° and retry +string[] image_paths # absolute paths to all saved images (Logitech + Insta360) + # Images are kept permanently for evaluation +string info # human-readable summary, e.g.: + # "gauge #1(conf=0.92) gauge #2(conf=0.78) — 6 images saved" diff --git a/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/inspection_ws/visual_inspection_interfaces/srv/UploadImages.srv b/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/inspection_ws/visual_inspection_interfaces/srv/UploadImages.srv new file mode 100644 index 00000000..aba61c09 --- /dev/null +++ b/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/inspection_ws/visual_inspection_interfaces/srv/UploadImages.srv @@ -0,0 +1,10 @@ +# UploadImages.srv +# BT calls this after a successful /visual_inspection/inspect +# The Jetson reads the saved files and HTTP-POSTs them to the laptop. + +string[] image_paths # paths returned by /visual_inspection/inspect +string session_label # human label, e.g. "engine_room_A" — used as folder name on laptop +--- +bool success # true if all files uploaded OK +string info # human-readable summary +int32 uploaded_count # how many files actually sent diff --git a/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/inspection_ws/visual_inspection_ros/docs/BT_INTEGRATION_GUIDE.md b/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/inspection_ws/visual_inspection_ros/docs/BT_INTEGRATION_GUIDE.md new file mode 100644 index 00000000..7aece5fe --- /dev/null +++ b/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/inspection_ws/visual_inspection_ros/docs/BT_INTEGRATION_GUIDE.md @@ -0,0 +1,434 @@ +# Behaviour Tree Integration Guide +**Visual Inspection System — Jetson Orin Nano** + +--- + +## 1. Overview + +The visual inspection pipeline uses **two ROS2 Services** that the BT calls in sequence: + +| Step | Service | What it does | +|---|---|---| +| 1 | `/visual_inspection/inspect` | Runs full inspection — detects, focuses, captures images, saves to Jetson disk | +| 2 | `/visual_inspection/upload_images` | Reads those saved files, HTTP-POSTs them to your laptop on the same WiFi network | + +> **The BT always calls Step 1 first, then Step 2.** If Step 2 (upload) fails, the BT should retry Step 2 — no need to redo the inspection. + +**IMPORTANT FOR BT DEVELOPERS:** Both are ROS2 **Services** (not Actions). The BT calls each one and simply waits for a response. There is no continuous feedback stream. + +--- + +## 2. Full BT Flow (Inspect → Upload) + +``` +BT navigates robot to waypoint (Nav2) + │ + ▼ +BT calls /visual_inspection/inspect + │ sends: target_object="gauge", location_label="engine_room_A" + │ + ▼ +Jetson runs inspection pipeline: + Insta360 detects object → Coarse servo move → IBVS fine focus → Capture 3 images + Images + metadata.json saved on Jetson disk + │ + ▼ +Response back to BT: + success=True, image_paths=["/path/img_01.jpg", ...], status="ok" + │ + ▼ +BT calls /visual_inspection/upload_images + │ sends: image_paths=[...], session_label="engine_room_A" + │ + ▼ +Jetson reads files, HTTP-POSTs them to laptop at 192.168.8.244:8888 + │ + ▼ +Laptop saves: received_captures/engine_room_A/gauge/instance_1/ + ├── img_01_conf0.85.jpg + ├── img_02_conf0.85.jpg + ├── img_03_conf0.85.jpg + └── metadata.json + │ + ▼ +Response back to BT: + success=True → BT continues to next waypoint + success=False → BT retries upload (no need to re-inspect) +``` + +--- + +## 3. Service 1 — Inspect + +**Service**: `/visual_inspection/inspect` +**Type**: `visual_inspection_interfaces/srv/Inspect` + +### Request (BT → Jetson) + +| Field | Type | What to send | +|---|---|---| +| `target_object` | `string` | What to inspect: `"fire_extinguisher"`, `"door"`, `"person"`, `"gauge"`, `"unknown"` | +| `location_label` | `string` | Location from BT map (e.g. `"engine_room_A"`). Used in folder names for tracking. | +| `max_objects` | `int32` | `0` = inspect all found objects of that type | +| `return_home` | `bool` | `true` = camera resets to center after done | + +### Response (Jetson → BT) + +| Field | Type | What BT receives | +|---|---|---| +| `success` | `bool` | `true` if ≥1 object was captured | +| `status` | `string` | `"ok"` / `"no_detection"` / `"ibvs_timeout"` / `"all_in_back"` / `"no_frames"` / `"busy"` | +| `objects_found` | `int32` | Objects spotted by Insta360 wide camera | +| `objects_inspected` | `int32` | Objects successfully focused + captured by Logitech | +| `object_in_back` | `bool` | If `true` → rotate robot 180° and retry | +| `image_paths` | `string[]` | Absolute paths to saved images on Jetson disk. Pass these to the upload service. | +| `info` | `string` | Human-readable summary | + +### Status Values + +| `status` | What BT should do | +|---|---| +| `"ok"` | Proceed to upload | +| `"no_detection"` | Nothing found → navigate to next waypoint | +| `"ibvs_timeout"` | IBVS didn't converge → move robot closer and retry | +| `"all_in_back"` | Object is behind robot → rotate 180° and retry | +| `"no_frames"` | Camera failure — check hardware | +| `"busy"` | Another inspection running — wait and retry | + +--- + +## 4. Service 2 — Upload Images to Laptop + +**Service**: `/visual_inspection/upload_images` +**Type**: `visual_inspection_interfaces/srv/UploadImages` + +The Jetson reads each file from the paths the BT gives it, then HTTP-POSTs them to your laptop. + +### Request (BT → Jetson) + +| Field | Type | What to send | +|---|---|---| +| `image_paths` | `string[]` | The `image_paths` received from the inspect response | +| `session_label` | `string` | Same `location_label` used in inspect — used as top folder on laptop | + +### Response (Jetson → BT) + +| Field | Type | What BT receives | +|---|---|---| +| `success` | `bool` | `true` if all files uploaded to laptop | +| `info` | `string` | Human-readable result or error message | +| `uploaded_count` | `int32` | How many files were sent | + +### About location_label / session tracking + +The `session_label` you send here (same as `location_label` in inspect) becomes the top-level folder on the laptop: + +``` +received_captures/ +└── engine_room_A/ ← your location_label / session_label + └── gauge/ + └── instance_1/ + ├── img_01_conf0.85.jpg + └── metadata.json ← contains: confidence, ibvs_time_s, final_error_px, etc. +``` + +This is how you track which images came from which location. + +--- + +## 5. Target Object Classes + +| `target_object` | Mode | Pipeline | +|---|---|---| +| `"fire_extinguisher"` | **Full IBVS** | Insta360 detect → coarse pan-tilt → Logitech IBVS → 3 images | +| `"extinguisher"` | **Full IBVS** | Same (alias) | +| `"door"` | **Full IBVS** | Same | +| `"person"` | **Full IBVS** | Same | +| `"gauge"` | **Full IBVS + Sweep** | Same + serpentine sweep if Insta360 misses | +| `"unknown"` | **Overview only** | Servo home → Insta360 raw + 1 Logitech image | +| `"main_cylinder"` | **Overview only** | Same | + +### Confidence Thresholds (per class) + +| Class | Threshold | +|---|---| +| `fire_extinguisher` | > 0.5 | +| `door` | > 0.5 | +| `person` | > 0.6 | +| `gauge` | > 0.3 (visually weaker) | + +--- + +## 6. metadata.json — What is inside each captured folder + +Every `instance_N/` folder contains a `metadata.json` alongside the images: + +```json +{ + "class": "gauge", + "instance_id": 1, + "confidence": 0.8887, + "session": "20260424_120000", + "location_label": "engine_room_A", + "num_images": 3, + "camera": "logitech", + "ibvs_converged": true, + "ibvs_error_px": 7.62, + "ibvs_time_s": 4.626, + "ibvs_iterations": 45, + "ibvs_fps": 9.7, + "coarse_time_s": 2.004, + "initial_ibvs_error_px": 203.06, + "pipeline_time_s": 18.4 +} +``` + +--- + +## 7. C++ BehaviorTree Node Examples (BehaviorTree.CPP) + +### Inspect Node + +```cpp +#include +#include +#include "visual_inspection_interfaces/srv/inspect.hpp" + +class InspectObjectNode : public BT::CoroActionNode +{ +public: + InspectObjectNode(const std::string& name, const BT::NodeConfiguration& config) + : BT::CoroActionNode(name, config) + { + node_ = rclcpp::Node::make_shared("bt_inspect_client"); + client_ = node_->create_client( + "/visual_inspection/inspect"); + } + + static BT::PortsList providedPorts() + { + return { + BT::InputPort("target_object"), + BT::InputPort("location_label"), + BT::OutputPort>("image_paths") // pass to upload node + }; + } + + BT::NodeStatus tick() override + { + std::string target_obj, location; + getInput("target_object", target_obj); + getInput("location_label", location); + + auto req = std::make_shared(); + req->target_object = target_obj; + req->location_label = location; + req->max_objects = 0; + req->return_home = true; + + auto future = client_->async_send_request(req); + while (rclcpp::ok() && + future.wait_for(std::chrono::milliseconds(100)) != std::future_status::ready) { + setStatusRunningAndYield(); + } + + auto res = future.get(); + if (res->success) { + setOutput("image_paths", res->image_paths); // forward to upload node + return BT::NodeStatus::SUCCESS; + } else if (res->object_in_back) { + // BT should rotate robot 180° and retry + return BT::NodeStatus::FAILURE; + } + return BT::NodeStatus::FAILURE; + } + +private: + rclcpp::Node::SharedPtr node_; + rclcpp::Client::SharedPtr client_; +}; +``` + +### Upload Node + +```cpp +#include "visual_inspection_interfaces/srv/upload_images.hpp" + +class UploadImagesNode : public BT::CoroActionNode +{ +public: + UploadImagesNode(const std::string& name, const BT::NodeConfiguration& config) + : BT::CoroActionNode(name, config) + { + node_ = rclcpp::Node::make_shared("bt_upload_client"); + client_ = node_->create_client( + "/visual_inspection/upload_images"); + } + + static BT::PortsList providedPorts() + { + return { + BT::InputPort>("image_paths"), // from inspect node + BT::InputPort("session_label") + }; + } + + BT::NodeStatus tick() override + { + std::vector paths; + std::string label; + getInput("image_paths", paths); + getInput("session_label", label); + + auto req = std::make_shared(); + req->image_paths = paths; + req->session_label = label; + + auto future = client_->async_send_request(req); + while (rclcpp::ok() && + future.wait_for(std::chrono::milliseconds(100)) != std::future_status::ready) { + setStatusRunningAndYield(); + } + + auto res = future.get(); + return res->success ? BT::NodeStatus::SUCCESS : BT::NodeStatus::FAILURE; + } + +private: + rclcpp::Node::SharedPtr node_; + rclcpp::Client::SharedPtr client_; +}; +``` + +--- + +## 8. BT XML Example (Navigate → Inspect → Upload) + +```xml + + + + + + + + + + + + + + + + + + +``` + +> `{captured_paths}` is a BT Blackboard variable — the inspect node writes to it, the upload node reads from it. + +--- + +## 9. Running Everything + +### On Jetson (4 terminals) + +```bash +# ── Paste in EVERY terminal ────────────────────────────────────────────────── +source /opt/ros/humble/setup.bash +source ~/Documents/Visual_Inspection_ws/inspection_ws/install/setup.bash + +# Terminal 1 — Cameras +ros2 run visual_inspection_ros camera_node + +# Terminal 2 — Servo controller +ros2 run visual_inspection_ros servo_node + +# Terminal 3 — Inspection service (main pipeline) +ros2 run visual_inspection_ros inspection_service + +# Terminal 4 — Image uploader (sends to laptop after inspection) +ros2 run visual_inspection_ros image_uploader +# Laptop IP is already set to 192.168.8.244:8888 +# Override if needed: --ros-args -p laptop_url:=http://:8888/upload +``` + +### On Laptop (1 terminal, connected to YasiruDEX WiFi) + +```bash +pip install flask # only needed once +cd /home/dinethra/Jetson_orin_nano/Evaluation_V_I_ws +python3 laptop_receiver.py +# Starts listening on port 8888 +# Saves received files to: received_captures//// +``` + +--- + +## 10. Testing — Simulate the Full BT Flow + +Run this **instead of** `test_inspection_service.py` when you want to test the full pipeline including HTTP upload: + +```bash +# On Jetson — after all 4 services are running: +source /opt/ros/humble/setup.bash +source ~/Documents/Visual_Inspection_ws/inspection_ws/install/setup.bash + +# Test gauge inspection + upload to laptop +python3 ~/Documents/Visual_Inspection_ws/test_scripts/test_bt_full_flow.py \ + --object gauge --location engine_room_A + +# Test fire extinguisher +python3 ~/Documents/Visual_Inspection_ws/test_scripts/test_bt_full_flow.py \ + --object fire_extinguisher --location corridor_B + +# What you should see: +# ── STEP 1: Visual Inspection ── +# ✓ Inspection complete — image_paths=[...] +# ── STEP 2: Upload Images to Laptop ── +# ✓ Upload complete — files at received_captures/engine_room_A/... +# ── BT RESULT ── +# ✓ BT node → SUCCESS +``` + +### Quick single-step tests (inspection only, no upload) + +```bash +# Inspect only — does NOT upload to laptop +python3 ~/Documents/Visual_Inspection_ws/test_scripts/test_inspection_service.py --object gauge +python3 ~/Documents/Visual_Inspection_ws/test_scripts/test_inspection_service.py --object fire_extinguisher +python3 ~/Documents/Visual_Inspection_ws/test_scripts/test_inspection_service.py --object door +``` + +--- + +## 11. Monitoring Topics (Debugging only) + +| Topic | Type | Description | +|---|---|---| +| `/visual_inspection/status` | `String` | `IDLE` / `DETECTING` / `COARSE` / `IBVS` / `CAPTURING` / `SWEEP` | +| `/visual_inspection/debug` | `Image` | Side-by-side Insta360 + Logitech (view in RViz2) | +| `/visual_inspection/ibvs_error` | `Point` | x/y pixel error during IBVS | +| `/visual_inspection/detections` | `String` | JSON of current detected objects | +| `/servo/pan_tilt` | `Int16MultiArray` | `[tilt, pan]` servo angles | + +> Skip these topics during real deployment to save CPU/GPU resources. + +--- + +## 12. File Locations + +| File | Purpose | +|---|---| +| `visual_inspection_interfaces/srv/Inspect.srv` | Service 1 definition | +| `visual_inspection_interfaces/srv/UploadImages.srv` | Service 2 definition | +| `visual_inspection_ros/inspection_service.py` | Service 1 server (Jetson) | +| `visual_inspection_ros/image_uploader.py` | Service 2 server (Jetson) | +| `Evaluation_V_I_ws/laptop_receiver.py` | HTTP receiver (runs on laptop) | +| `test_scripts/test_bt_full_flow.py` | Full BT flow test (inspect + upload) | +| `test_scripts/test_inspection_service.py` | Inspect-only test | diff --git a/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/inspection_ws/visual_inspection_ros/setup.py b/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/inspection_ws/visual_inspection_ros/setup.py index adc16ab1..b2d7afed 100644 --- a/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/inspection_ws/visual_inspection_ros/setup.py +++ b/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/inspection_ws/visual_inspection_ros/setup.py @@ -19,11 +19,12 @@ license='MIT', entry_points={ 'console_scripts': [ - # Add only nodes that exist — add more as files are created - 'camera_node = visual_inspection_ros.camera_node:main', - 'servo_node = visual_inspection_ros.servo_node:main', - 'ibvs_action_server = visual_inspection_ros.ibvs_action_server:main', - 'run_inspection_bt = visual_inspection_ros.bt_nodes.inspection_bt_nodes:main', + 'camera_node = visual_inspection_ros.camera_node:main', + 'servo_node = visual_inspection_ros.servo_node:main', + 'ibvs_action_server = visual_inspection_ros.ibvs_action_server:main', + 'inspection_service = visual_inspection_ros.inspection_service:main', + 'image_uploader = visual_inspection_ros.image_uploader:main', + 'run_inspection_bt = visual_inspection_ros.bt_nodes.inspection_bt_nodes:main', ], }, ) diff --git a/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/inspection_ws/visual_inspection_ros/visual_inspection_ros/ibvs_action_server.py b/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/inspection_ws/visual_inspection_ros/visual_inspection_ros/ibvs_action_server.py index 6dc5044d..f8e87878 100644 --- a/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/inspection_ws/visual_inspection_ros/visual_inspection_ros/ibvs_action_server.py +++ b/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/inspection_ws/visual_inspection_ros/visual_inspection_ros/ibvs_action_server.py @@ -97,15 +97,49 @@ def load_yolo(engine_path): class IBVSActionServer(Node): # Paths - ENGINE_PATH = os.path.expanduser('~/Documents/Visual_Inspection_ws/weights/yolo11n.engine') + ENGINE_PATH = os.path.expanduser('~/Documents/Visual_Inspection_ws/weights/yolov26s.engine') MQTT_CFG_PATH = os.path.expanduser('~/Documents/Visual_Inspection_ws/config/mqtt_config.yaml') - # YOLO confidence - CONF_INSTA = 0.5 # coarse detection - CONF_IBVS = 0.3 # IBVS (object may be partial/close) + # Known object classes the BT can request (BT name → YOLO class name) + # ACTUAL YOLO MODEL NAMES: {0: 'door', 1: 'extinguisher', 2: 'gauge', 3: 'person'} + # BT sends human-readable names; we normalize them to match YOLO class names + KNOWN_CLASSES = { + 'fire_extinguisher' : 'extinguisher', + 'fire extinguisher' : 'extinguisher', + 'extinguisher' : 'extinguisher', + 'door' : 'door', + 'person' : 'person', + 'people' : 'person', + 'gauge' : 'gauge', + 'pressure_gauge' : 'gauge', + 'main_cylinder' : 'main_cylinder', + 'unknown' : 'unknown', + 'any' : '', + '' : '', + } + + # Classes that only need overview capture (no IBVS, no pan-tilt) + # Servo goes home, captures BOTH Insta360 + Logitech overview + OVERVIEW_ONLY_CLASSES = {'unknown', 'main_cylinder'} + + # YOLO-trained target classes (full IBVS pipeline) + IBVS_CLASSES = {'extinguisher', 'door', 'person', 'gauge'} + + # Per-class confidence thresholds + # gauge is weak — lower threshold so we don't miss it + CLASS_CONF = { + 'extinguisher': 0.7, + 'door': 0.5, + 'person': 0.6, + 'gauge': 0.3, + } + CONF_DEFAULT = 0.5 # fallback for unknown classes + CONF_YOLO_MIN = 0.3 # YOLO runs at lowest needed threshold, post-filter per-class + CONF_IBVS = 0.3 # IBVS detection (Logitech) — run low, post-filter per-class # Timeouts INSTA_SEARCH_TIMEOUT = 20.0 # max wait for initial detection on Insta360 + GAUGE_INSTA_TIMEOUT = 8.0 # shorter timeout for gauge before sweep scan LOGI_FIRST_DET_WAIT = 5.0 # wait for first detection in Logitech after coarse IBVS_TOTAL_TIMEOUT = 40.0 # total IBVS time budget (seconds) IBVS_LOST_PATIENCE = 3.0 # how long object can vanish during IBVS @@ -125,7 +159,7 @@ class IBVSActionServer(Node): FX_LOGI = 640.0; FY_LOGI = 640.0 # Capture - IMAGES_PER_OBJ = 4 + IMAGES_PER_OBJ = 3 CAPTURE_DELAY = 0.5 FOCUS_WAIT = 10.0 # seconds to wait after IBVS for autofocus before capture KEEP_LOCAL = True # True = always keep images locally (dataset mode) @@ -180,10 +214,16 @@ def __init__(self): # State visible to debug timer self._yolo_lock = threading.Lock() self._mode = 'IDLE' + self._target_class = '' # '' = any class; set per goal from BT self._ibvs_ex = 0.0 self._ibvs_ey = 0.0 self._ibvs_iter = 0 self._ibvs_err = 0.0 + self._ibvs_time_s = 0.0 + self._ibvs_fps = 0.0 + self._coarse_time_s= 0.0 + self._pipeline_start = 0.0 + self._initial_ibvs_err = 0.0 # coarse accuracy: IBVS pixel error at iteration 0 self._fps_counter = 0 self._fps_time = time.time() self._fps = 0.0 @@ -270,19 +310,43 @@ def _home(self): # ---- YOLO detection ----------------------------------------------------- + # YOLO input resize — TRT engine compiled for 640x360 (same as original ibvs_pipeline.py) + YOLO_INPUT_H = 360 + def _detect_raw(self, frame, conf): """Run YOLO. Returns (detections, annotated_frame). Call with lock held. det tuple: (cx, cy, x1, y1, x2, y2, conf, cls_name) + + FIX: Original ibvs_pipeline.py ran YOLO on cv2.resize(frame_logi, (640,360)). + TensorRT engine was compiled for that input size. Running on raw 640x480 caused + bad detections. We resize to YOLO_INPUT_H=360, run YOLO, then scale coords back. """ if self.model is None or frame is None: return [], frame.copy() if frame is not None else None - results = self.model(frame, verbose=False, conf=conf)[0] - debug = frame.copy() + h_orig, w_orig = frame.shape[:2] + + # Resize to match TRT engine compiled size (640x360) + if h_orig != self.YOLO_INPUT_H: + yolo_frame = cv2.resize(frame, (w_orig, self.YOLO_INPUT_H)) + scale_y = h_orig / self.YOLO_INPUT_H # 480/360 = 1.333 + scale_x = 1.0 # width unchanged (both 640) + else: + yolo_frame = frame + scale_y = 1.0 + scale_x = 1.0 + + results = self.model(yolo_frame, verbose=False, conf=conf)[0] + debug = frame.copy() # annotate on original resolution frame dets = [] for box in results.boxes: x1, y1, x2, y2 = [int(v) for v in box.xyxy[0].tolist()] + + # Scale coords back to original frame size + x1 = int(x1 * scale_x); x2 = int(x2 * scale_x) + y1 = int(y1 * scale_y); y2 = int(y2 * scale_y) + cx = (x1 + x2) / 2.0 cy = (y1 + y2) / 2.0 c = float(box.conf[0]) @@ -294,6 +358,26 @@ def _detect_raw(self, frame, conf): cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1) dets.sort(key=lambda d: d[6], reverse=True) + + # Post-filter: apply per-class confidence thresholds + filtered = [] + for d in dets: + cls = d[7].lower() + min_conf = self.CLASS_CONF.get(cls, self.CONF_DEFAULT) + if d[6] >= min_conf: + filtered.append(d) + dets = filtered + + # Filter to target class if specified + if self._target_class: + tc = self._target_class.lower() + before = len(dets) + dets = [d for d in dets if d[7].lower() == tc] + if before > 0 and len(dets) == 0: + self.get_logger().debug( + f' [filter] {before} dets → 0 after class filter "{tc}" ' + f'(classes seen: {[d[7] for d in filtered]})') + h, w = frame.shape[:2] cv2.line(debug, (w//2, 0), (w//2, h), (80, 80, 200), 1) cv2.line(debug, (0, h//2), (w, h//2), (80, 80, 200), 1) @@ -304,6 +388,7 @@ def _detect_raw(self, frame, conf): return dets, debug + def _detect_insta(self, conf=None): """Detect on Insta360 using YOLO tracking (ByteTrack). Returns (front_dets, back_dets, raw_frame, annotated_frame). @@ -311,7 +396,7 @@ def _detect_insta(self, conf=None): frame = self._get_insta() if frame is None: return [], [], None, None - c = conf or self.CONF_INSTA + c = conf or self.CONF_YOLO_MIN with self._yolo_lock: # Use track() for ByteTrack assignment try: @@ -365,6 +450,21 @@ def _detect_insta(self, conf=None): front.sort(key=lambda d: d[7] if d[7] >= 0 else d[6]) back.sort( key=lambda d: d[7] if d[7] >= 0 else d[6]) + # Post-filter: per-class confidence thresholds + for lst in (front, back): + lst[:] = [d for d in lst + if d[6] >= self.CLASS_CONF.get(d[8].lower(), self.CONF_DEFAULT)] + + # Filter to target class if specified + if self._target_class: + tc = self._target_class.lower() + before_f, before_b = len(front), len(back) + front = [d for d in front if d[8].lower() == tc] + back = [d for d in back if d[8].lower() == tc] + if (before_f + before_b) > 0 and len(front) + len(back) == 0: + self.get_logger().debug( + f' [insta filter] {before_f+before_b} dets → 0 after "{tc}"') + return front, back, frame, debug def _detect_logi(self, conf=None): @@ -386,6 +486,7 @@ def _detect_logi(self, conf=None): cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 200, 0), 1) return dets, frame, dbg + # ---- Combined debug publisher ------------------------------------------- def _build_debug_frame(self, insta_dbg, logi_dbg): @@ -437,7 +538,9 @@ def _publish_debug(self, insta_dbg, logi_dbg): self._fps_time = now def _debug_timer_cb(self): - """4Hz timer — publishes combined debug when no active goal.""" + """4Hz timer — publishes combined debug when no active goal. + Applies per-class confidence thresholds so debug view only shows + valid detections (e.g. person must be >=0.5, gauge >=0.3).""" if self._goal_active: return if not self._yolo_lock.acquire(blocking=False): @@ -445,21 +548,192 @@ def _debug_timer_cb(self): try: insta_f = self._get_insta() logi_f = self._get_logi() - _, insta_dbg = self._detect_raw(insta_f, self.CONF_INSTA) if insta_f is not None else ([], None) - _, logi_dbg = self._detect_raw(logi_f, self.CONF_INSTA) if logi_f is not None else ([], None) + # Run YOLO at lowest threshold, post-filter per-class + raw_insta, insta_dbg = self._detect_raw(insta_f, self.CONF_YOLO_MIN) \ + if insta_f is not None else ([], None) + raw_logi, logi_dbg = self._detect_raw(logi_f, self.CONF_YOLO_MIN) \ + if logi_f is not None else ([], None) finally: self._yolo_lock.release() + # Rebuild debug frames with per-class filtered boxes only + if insta_f is not None: + insta_dbg = self._draw_filtered(insta_f, raw_insta) + if logi_f is not None: + logi_dbg = self._draw_filtered(logi_f, raw_logi) + self._publish_debug(insta_dbg, logi_dbg) + def _draw_filtered(self, frame, dets): + """Draw only detections that pass per-class confidence threshold.""" + import cv2 + dbg = frame.copy() + h, w = dbg.shape[:2] + cv2.line(dbg, (w//2, 0), (w//2, h), (80, 80, 200), 1) + cv2.line(dbg, (0, h//2), (w, h//2), (80, 80, 200), 1) + for d in dets: + cls = d[7].lower() + conf = d[6] + min_conf = self.CLASS_CONF.get(cls, self.CONF_DEFAULT) + if conf < min_conf: + continue # skip — below class threshold + x1, y1, x2, y2 = int(d[2]), int(d[3]), int(d[4]), int(d[5]) + cx, cy = int(d[0]), int(d[1]) + cv2.rectangle(dbg, (x1, y1), (x2, y2), (0, 255, 0), 2) + cv2.putText(dbg, f'{cls} {conf:.2f}', (x1, max(y1-5, 10)), + cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1) + cv2.circle(dbg, (cx, cy), 5, (0, 0, 255), -1) + return dbg + + # ---- Gauge sweep scan (fallback when Insta360 misses gauge) -------------- + + # Gauge sweep — smooth serpentine: tilt=[40,70,100], pan=[20↔160] + SWEEP_TILTS = [40, 70, 100] # tilt row positions (degrees) + SWEEP_PAN_RANGE = (20, 160) # pan extent + SWEEP_INTERP_HZ = 30 # servo update rate during sweep + SWEEP_DEG_PER_S = 40.0 # degrees per second (smooth speed) + SWEEP_CHECK_HZ = 10 # detection checks per second during sweep + + def _sweep_move(self, from_tilt, from_pan, to_tilt, to_pan, + check_fn, check_cancel_fn): + """ + Smoothly interpolate servo from (from_tilt,from_pan) to (to_tilt,to_pan) + at SWEEP_DEG_PER_S degrees/s. Calls check_fn() every 1/SWEEP_CHECK_HZ s. + Returns det list immediately if check_fn() finds something, else []. + """ + total_deg = max(abs(to_pan - from_pan), abs(to_tilt - from_tilt), 1) + duration = total_deg / self.SWEEP_DEG_PER_S # seconds + n_steps = max(int(duration * self.SWEEP_INTERP_HZ), 1) + check_every = max(1, self.SWEEP_INTERP_HZ // self.SWEEP_CHECK_HZ) + + for i in range(n_steps + 1): + if check_cancel_fn(): + return None # cancelled + t = i / n_steps + pan = from_pan + t * (to_pan - from_pan) + tilt = from_tilt + t * (to_tilt - from_tilt) + self._servo(tilt, pan) + + # Check Logitech periodically during move + if i % check_every == 0: + dets, _, logi_dbg = self._detect_logi() + insta_f = self._get_insta() + self._publish_debug(insta_f, logi_dbg) + if dets: + return dets + + time.sleep(1.0 / self.SWEEP_INTERP_HZ) + + return [] # reached target, nothing found + + def _gauge_sweep_scan(self, goal_handle, feedback, session_ts): + """ + Smooth serpentine sweep for gauge: + tilt=40 → pan 20→160 + tilt=70 → pan 160→20 + tilt=100 → pan 20→160 + Servo moves smoothly. Detection checked during motion. + Returns Result on success, None if gauge not found. + """ + self._mode = 'SWEEP' + self.get_logger().info( + ' Gauge sweep scan (smooth): tilt=[40,70,100] pan=[20↔160]') + feedback.current_step = 'detecting' + goal_handle.publish_feedback(feedback) + + def cancel(): return goal_handle.is_cancel_requested + + cur_tilt = 40 + cur_pan = 20 + + # Move to start position smoothly from home + self._sweep_move(90, 90, cur_tilt, cur_pan, lambda: [], cancel) + time.sleep(0.3) + + for sweep_idx, tilt in enumerate(self.SWEEP_TILTS): + # Serpentine direction + pan_end = self.SWEEP_PAN_RANGE[1] if sweep_idx % 2 == 0 \ + else self.SWEEP_PAN_RANGE[0] + + # 1. Move tilt to row position first (current pan → same pan, new tilt) + res = self._sweep_move(cur_tilt, cur_pan, tilt, cur_pan, lambda: [], cancel) + if res is None: + return None # cancelled + cur_tilt = tilt + + # 2. Pan sweep across row + res = self._sweep_move(cur_tilt, cur_pan, cur_tilt, pan_end, lambda: [], cancel) + if res is None: + return None # cancelled + + # Check if detection happened during sweep + dets, _, logi_dbg = self._detect_logi() + insta_f = self._get_insta() + self._publish_debug(insta_f, logi_dbg) + + if res or dets: + found_dets = res if res else dets + self.get_logger().info( + f' SWEEP: gauge found at approx tilt={tilt} ' + f'pan={cur_pan}→{pan_end}! Starting IBVS') + self._pub_status('IBVS') + + # Use current servo position as IBVS start + centred = self._ibvs(goal_handle, cur_pan, cur_tilt, + feedback, 1) + if centred: + self._pub_status('CAPTURING') + self.get_logger().info( + f' Waiting {self.FOCUS_WAIT}s for autofocus...') + time.sleep(self.FOCUS_WAIT) + + conf = found_dets[0][6] if found_dets else 0.3 + paths = self._capture( + self.IMAGES_PER_OBJ, obj_id=1, + session_ts=session_ts, cls_name='gauge', + conf_score=conf) + overview_paths = self._capture_insta_overview( + session_ts, 'sweep_gauge', count=1, + folder='inspection', obj_cls='gauge', obj_id=1) + + all_paths = paths + overview_paths + self._mqtt(all_paths, 1, session_ts=session_ts, + cls_name='gauge') + self.get_logger().info( + f' Sweep gauge done: {len(all_paths)} images') + + result = InspectObjects.Result() + result.success = True + result.objects_inspected = 1 + result.objects_found = 1 + result.object_in_back = False + result.failed_reason = '' + self._pub_status('IDLE') + self._goal_active = False + self._home() + goal_handle.succeed() + return result + else: + self.get_logger().warn(' Sweep IBVS did not converge — continuing') + + cur_pan = pan_end + + self.get_logger().warn(' Gauge sweep complete — gauge not found') + return None + # ---- Stage 1: search Insta360 ------------------------------------------- def _search_insta(self, goal_handle): - """Search Insta360 up to 20s. + """Search Insta360 up to timeout. + For gauge: uses shorter timeout (GAUGE_INSTA_TIMEOUT) before sweep scan. Returns (front_dets, back_dets) or (None, None) on timeout.""" self._mode = 'DETECTING' - self.get_logger().info(f' Searching Insta360 (up to {self.INSTA_SEARCH_TIMEOUT}s)...') - deadline = time.time() + self.INSTA_SEARCH_TIMEOUT + # Gauge gets shorter timeout — we'll fall back to sweep scan + timeout = (self.GAUGE_INSTA_TIMEOUT + if self._target_class == 'gauge' + else self.INSTA_SEARCH_TIMEOUT) + self.get_logger().info(f' Searching Insta360 (up to {timeout}s)...') + deadline = time.time() + timeout while time.time() < deadline: if goal_handle.is_cancel_requested: @@ -480,7 +754,7 @@ def _search_insta(self, goal_handle): time.sleep(0.1) - self.get_logger().warn(' No objects found on Insta360 within timeout') + self.get_logger().warn(f' No objects found on Insta360 within {timeout}s') return None, None # ---- Stage 2: IBVS on Logitech ------------------------------------------ @@ -500,15 +774,15 @@ def _ibvs(self, goal_handle, start_pan, start_tilt, feedback, obj_idx): last_seen_time = time.time() start_time = time.time() ibvs_iter = 0 - insta_dbg_cache = None # freeze Insta360 side during IBVS + n_det_frames = 0 # count frames where object was detected + insta_dbg_cache = None - # Cache last Insta360 debug frame _, _, _, insta_dbg_cache = self._detect_insta() - deadline = start_time + self.IBVS_TOTAL_TIMEOUT while time.time() < deadline: if goal_handle.is_cancel_requested: + self._ibvs_time_s = time.time() - start_time return False dets, _, logi_dbg = self._detect_logi() @@ -519,18 +793,31 @@ def _ibvs(self, goal_handle, start_pan, start_tilt, feedback, obj_idx): if not dets: if now - last_seen_time > self.IBVS_LOST_PATIENCE: self.get_logger().warn(f' IBVS: object not seen for {now-last_seen_time:.1f}s -- aborting') + self._ibvs_time_s = now - start_time return False self.get_logger().info(f' IBVS: object not seen ({now-last_seen_time:.1f}s) -- waiting...') time.sleep(dt) continue last_seen_time = now + n_det_frames += 1 + + if self._target_class: + tc = self._target_class.lower() + dets = [d for d in dets if d[7].lower() == tc] + if not dets: + time.sleep(dt) + continue + cx_d, cy_d = dets[0][0], dets[0][1] ex = cx_d - self.CX_LOGI ey = cy_d - self.CY_LOGI err = (ex**2 + ey**2) ** 0.5 - # Update overlay state + # Capture coarse accuracy — the error at the very first detection + if ibvs_iter == 0: + self._initial_ibvs_err = err + self._ibvs_ex = ex self._ibvs_ey = ey self._ibvs_iter = ibvs_iter @@ -542,7 +829,19 @@ def _ibvs(self, goal_handle, start_pan, start_tilt, feedback, obj_idx): goal_handle.publish_feedback(feedback) if err < self.IBVS_TOL_PX: - self.get_logger().info(f' IBVS converged: err={err:.1f}px at iter {ibvs_iter}') + elapsed = time.time() - start_time + self._ibvs_time_s = elapsed + # ibvs_iter = number of PID loop steps completed + # n_det_frames = frames where object was detected (may be 0 on instant converge) + # Use ibvs_iter/elapsed as fps (more stable) + if elapsed > 0: + self._ibvs_fps = round( + (n_det_frames if n_det_frames > 0 else ibvs_iter) / elapsed, 1) + else: + self._ibvs_fps = 0.0 + self.get_logger().info( + f' IBVS converged: err={err:.1f}px at iter {ibvs_iter} ' + f'time={elapsed:.2f}s fps={self._ibvs_fps}') return True # Angular errors @@ -556,17 +855,14 @@ def _ibvs(self, goal_handle, start_pan, start_tilt, feedback, obj_idx): theta_y = 0.0 integral_tilt = 0.0 - # PID pan integral_pan = np.clip(integral_pan + theta_x * dt, -50, 50) dp = -(self.KP*theta_x + self.KI*integral_pan + self.KD*(theta_x-prev_ep)/dt) prev_ep = theta_x - # PID tilt integral_tilt = np.clip(integral_tilt + theta_y * dt, -50, 50) dt_ = -(self.KP*theta_y + self.KI*integral_tilt + self.KD*(theta_y-prev_et)/dt) prev_et = theta_y - # Velocity limit sp = min(1.0, abs(ex)/self.SLOW_ZONE_PX) st = min(1.0, abs(ey)/self.SLOW_ZONE_PX) dp = np.clip(dp, -self.MAX_STEP_DEG*sp, self.MAX_STEP_DEG*sp) @@ -582,14 +878,21 @@ def _ibvs(self, goal_handle, start_pan, start_tilt, feedback, obj_idx): ibvs_iter += 1 time.sleep(dt) - self.get_logger().warn(f' IBVS timeout after {self.IBVS_TOTAL_TIMEOUT}s') + elapsed = time.time() - start_time + self._ibvs_time_s = elapsed + self._ibvs_fps = round(n_det_frames / elapsed, 1) if elapsed > 0 else 0.0 + self.get_logger().warn(f' IBVS timeout after {elapsed:.1f}s') return False + # ---- Image capture (local save) ---------------------------------------- - def _capture(self, n=4, obj_id=1, session_ts='', cls_name='object'): + def _capture(self, n=4, obj_id=1, session_ts='', cls_name='object', + conf_score=0.0, ibvs_err=0.0, ibvs_iter=0, ibvs_converged=False, + location_label=''): """Capture n images from Logitech, save to inspection/ folder. Folder: captures/inspection/session_ts/cls_name/instance_N/ + Saves metadata.json with confidence + IBVS stats for evaluation CSV. Returns list of saved file paths.""" base = Path(os.path.expanduser( self._mqtt_cfg.get('capture_dir', '~/Documents/Visual_Inspection_ws/captures') @@ -601,17 +904,55 @@ def _capture(self, n=4, obj_id=1, session_ts='', cls_name='object'): for i in range(n): f = self._get_logi() if f is not None: - fpath = cap_dir / f'img_{i+1:02d}.jpg' + fpath = cap_dir / f'img_{i+1:02d}_conf{conf_score:.2f}.jpg' q = self._mqtt_cfg.get('jpeg_quality', 85) cv2.imwrite(str(fpath), f, [cv2.IMWRITE_JPEG_QUALITY, q]) paths.append(fpath) self.get_logger().info(f' Saved {fpath.name}') time.sleep(self.CAPTURE_DELAY) + + # Real wall-clock ibvs_time_s (measured in _ibvs, not estimated) + # Use getattr() fallbacks so stale __init__ on Jetson never causes AttributeError + _ibvs_time = round(float(getattr(self, '_ibvs_time_s', 0.0)), 3) + _ibvs_fps = round(float(getattr(self, '_ibvs_fps', 0.0)), 1) + _coarse_time = round(float(getattr(self, '_coarse_time_s', 0.0)), 3) + _initial_err = round(float(getattr(self, '_initial_ibvs_err', 0.0)), 2) + _pipe_start = getattr(self, '_pipeline_start', 0.0) + _pipeline_time = round(time.time() - _pipe_start, 2) if _pipe_start > 0 else 0.0 + + meta = { + 'class': cls_name, + 'instance_id': obj_id, + 'confidence': round(conf_score, 4), + 'session': session_ts, + 'location_label': location_label, + 'num_images': len(paths), + 'camera': 'logitech', + 'ibvs_converged': ibvs_converged, + 'ibvs_error_px': round(float(ibvs_err), 3), + 'ibvs_time_s': _ibvs_time, + 'ibvs_iterations': ibvs_iter, + 'ibvs_fps': _ibvs_fps, + 'coarse_time_s': _coarse_time, + 'initial_ibvs_error_px': _initial_err, + 'pipeline_time_s': _pipeline_time, + } + + meta_path = cap_dir / 'metadata.json' + with open(meta_path, 'w') as f: + json.dump(meta, f, indent=2) + self.get_logger().info( + f' Metadata saved: conf={conf_score:.4f} ' + f'ibvs_converged={ibvs_converged} err={ibvs_err:.1f}px ' + f'time={self._ibvs_time_s}s') + + return paths + def _capture_insta_overview(self, session_ts, location_label, count=1, folder='inspection', obj_cls='', obj_id=1): - """Capture Insta360 frames with YOLO drawn. + """Capture Insta360 RAW frames (no bounding boxes). For ROI inspection: folder='inspection', saved alongside ROI images. For BT overview request: folder='overview', standalone. Returns list of saved file paths.""" @@ -627,14 +968,15 @@ def _capture_insta_overview(self, session_ts, location_label, count=1, q = self._mqtt_cfg.get('jpeg_quality', 85) paths = [] for i in range(count): - _, _, _, dbg = self._detect_insta() - if dbg is not None: + # Save RAW frame — no bounding boxes + raw = self._get_insta() + if raw is not None: lbl = location_label.replace(' ', '_') or 'unknown' fname = f'overview_{lbl}_{i+1:02d}.jpg' fpath = cap_dir / fname - cv2.imwrite(str(fpath), dbg, [cv2.IMWRITE_JPEG_QUALITY, q]) + cv2.imwrite(str(fpath), raw, [cv2.IMWRITE_JPEG_QUALITY, q]) paths.append(fpath) - self.get_logger().info(f' Insta360 overview saved: {fpath.name}') + self.get_logger().info(f' Insta360 overview saved (raw): {fpath.name}') time.sleep(0.3) return paths @@ -753,11 +1095,30 @@ def execute_callback(self, goal_handle): feedback = InspectObjects.Feedback() result = InspectObjects.Result() - max_obj = goal_handle.request.max_objects - ret_home = goal_handle.request.return_home - overview_only = goal_handle.request.overview_only + max_obj = goal_handle.request.max_objects + ret_home = goal_handle.request.return_home + overview_only = goal_handle.request.overview_only location_label = goal_handle.request.location_label or 'unknown' overview_count = goal_handle.request.overview_count or 2 + target_obj_raw = getattr(goal_handle.request, 'target_object', '') or '' + + # Normalise target_object → YOLO class name + resolved_target = self.KNOWN_CLASSES.get( + target_obj_raw.lower().strip(), target_obj_raw.lower().strip()) + + # Decide mode: overview-only for unknown/main_cylinder, full IBVS for trained classes + if resolved_target in self.OVERVIEW_ONLY_CLASSES: + overview_only = True + self._target_class = '' # no YOLO filter — just capture overview + self.get_logger().info( + f' Target "{target_obj_raw}" → OVERVIEW-ONLY mode (no pan-tilt/IBVS)') + elif resolved_target: + self._target_class = resolved_target + self.get_logger().info( + f' Target object filter: "{target_obj_raw}" → YOLO class "{self._target_class}"') + else: + self._target_class = '' + self.get_logger().info(' No target object filter — detecting all classes') def abort(reason, in_back=False, found=0): self._pub_status('IDLE') @@ -774,30 +1135,59 @@ def abort(reason, in_back=False, found=0): session_ts = datetime.now().strftime('%Y%m%d_%H%M%S') - # ── OVERVIEW-ONLY MODE: bypass IBVS, just capture Insta360 snapshots ── + # ── OVERVIEW-ONLY MODE: for unknown/main_cylinder ── + # Servo goes home (90,90), captures BOTH Insta360 + Logitech overview + # No pan-tilt, no IBVS — we can't identify these objects with YOLO if overview_only: + label = target_obj_raw or location_label self.get_logger().info( - f' Overview-only: capturing {overview_count} Insta360 frames ' - f'(label: {location_label})') + f' Overview-only ({label}): servo home → capture both cameras') self._pub_status('OVERVIEW') feedback.current_step = 'overview' goal_handle.publish_feedback(feedback) - paths = self._capture_insta_overview( - session_ts, location_label, count=overview_count, folder='overview') - self._mqtt(paths, obj_id=0, session_ts=session_ts, cls_name=location_label) + + # Move servo to home position + self._home() + time.sleep(2.0) # wait for servo to settle + + # Capture Insta360 overview images + insta_paths = self._capture_insta_overview( + session_ts, label, count=overview_count, folder='overview') + + # Also capture 1 Logitech image from home position + logi_paths = self._capture( + n=1, obj_id=1, + session_ts=session_ts, cls_name=label, conf_score=0.0) + + all_paths = insta_paths + logi_paths + self.get_logger().info( + f' Overview done: {len(insta_paths)} Insta360 + ' + f'{len(logi_paths)} Logitech = {len(all_paths)} total') + self._mqtt(all_paths, obj_id=0, session_ts=session_ts, cls_name=label) + self._pub_status('IDLE') self._goal_active = False - result.success = len(paths) > 0 + result.success = len(all_paths) > 0 result.objects_inspected = 0 - result.objects_found = len(paths) + result.objects_found = len(all_paths) result.object_in_back = False - result.failed_reason = '' if paths else 'no_frames' + result.failed_reason = '' if all_paths else 'no_frames' goal_handle.succeed() return result # ── FULL INSPECTION MODE ────────────────────────────────────────────── front_dets, back_dets = self._search_insta(goal_handle) + # ── GAUGE SWEEP SCAN: if gauge not found on Insta360, sweep with Logitech ── + if (front_dets is None or (not front_dets and not back_dets)) and self._target_class == 'gauge': + self.get_logger().info(' Gauge not found on Insta360 — starting sweep scan with Logitech') + sweep_result = self._gauge_sweep_scan(goal_handle, feedback, session_ts) + if sweep_result is not None: + # sweep_result is the final Result + return sweep_result + # If sweep also failed, fall through to abort + return abort('no_detection') + if front_dets is None and back_dets is None: return abort('no_detection') @@ -813,8 +1203,21 @@ def abort(reason, in_back=False, found=0): if max_obj > 0: front_dets = front_dets[:max_obj] + # Number objects within each class by confidence (highest = 1) + # e.g., 2 gauges: gauge #1 (conf=0.92), gauge #2 (conf=0.78) + class_counters = {} # {cls_name: count} + numbered_dets = [] + for det in sorted(front_dets, key=lambda d: d[6], reverse=True): + cls = det[8] # cls_name field (index 8 in Insta360 det tuple) + class_counters[cls] = class_counters.get(cls, 0) + 1 + numbered_dets.append((*det, class_counters[cls])) # append instance_num + # Re-sort by original order (front_dets was sorted by track_id) + front_dets = numbered_dets + self.get_logger().info( f' Processing {len(front_dets)} front object(s) in order') + for cls, cnt in class_counters.items(): + self.get_logger().info(f' {cls}: {cnt} instance(s)') if back_dets: self.get_logger().info( f' {len(back_dets)} back object(s) noted -- will signal BT after front done') @@ -824,10 +1227,10 @@ def abort(reason, in_back=False, found=0): if goal_handle.is_cancel_requested: break - cx_obj, cy_obj, *_, conf, tid, cls_name = det + cx_obj, cy_obj, *_, conf, tid, cls_name, instance_num = det self.get_logger().info( - f'Object {obj_idx}/{len(front_dets)}: [{cls_name}] cx={cx_obj:.0f} ' - f'cy={cy_obj:.0f} conf={conf:.2f} track_id={tid}') + f'Object {obj_idx}/{len(front_dets)}: [{cls_name} #{instance_num}] ' + f'cx={cx_obj:.0f} cy={cy_obj:.0f} conf={conf:.2f} track_id={tid}') # ---- Coarse positioning ---- self._pub_status('COARSE') @@ -881,21 +1284,27 @@ def abort(reason, in_back=False, found=0): self.get_logger().info(f' Waiting {self.FOCUS_WAIT}s for autofocus...') time.sleep(self.FOCUS_WAIT) - # Capture 4 Logitech ROI images - paths = self._capture(self.IMAGES_PER_OBJ, obj_id=obj_idx, - session_ts=session_ts, cls_name=cls_name) + # Capture 4 Logitech ROI images (include confidence in filename) + paths = self._capture(self.IMAGES_PER_OBJ, obj_id=instance_num, + session_ts=session_ts, cls_name=cls_name, + conf_score=conf, + ibvs_err=self._ibvs_err, + ibvs_iter=self._ibvs_iter, + ibvs_converged=centred) # Also capture Insta360 overview with YOLO boxes overview_paths = self._capture_insta_overview( session_ts, location_label, count=1, - folder='inspection', obj_cls=cls_name, obj_id=obj_idx) + folder='inspection', obj_cls=cls_name, obj_id=instance_num) all_paths = paths + overview_paths self.get_logger().info( - f' Captured {len(paths)} ROI + {len(overview_paths)} overview = {len(all_paths)} total') - self._mqtt(all_paths, obj_idx, session_ts=session_ts, cls_name=cls_name) + f' Captured {len(paths)} ROI + {len(overview_paths)} overview ' + f'= {len(all_paths)} total [{cls_name} #{instance_num} conf={conf:.2f}]') + self._mqtt(all_paths, instance_num, session_ts=session_ts, cls_name=cls_name) n_inspected += 1 - self.get_logger().info(f' Object {obj_idx} done ({n_inspected} total)') + self.get_logger().info( + f' Object {obj_idx} ({cls_name} #{instance_num}) done ({n_inspected} total)') # ---- Done ---- self._pub_status('IDLE') diff --git a/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/inspection_ws/visual_inspection_ros/visual_inspection_ros/image_uploader.py b/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/inspection_ws/visual_inspection_ros/visual_inspection_ros/image_uploader.py new file mode 100644 index 00000000..196cd7a4 --- /dev/null +++ b/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/inspection_ws/visual_inspection_ros/visual_inspection_ros/image_uploader.py @@ -0,0 +1,151 @@ +""" +image_uploader.py — ROS2 Service Node +====================================== +Service: /visual_inspection/upload_images +Type: visual_inspection_interfaces/srv/UploadImages + +BT calls this AFTER /visual_inspection/inspect returns image_paths. +This node reads each image file + its metadata.json from disk, then +HTTP-POSTs them to the laptop receiver server running on the same network. + +Laptop server URL is read from the ROS2 param 'laptop_url'. +Set it when launching: + ros2 run visual_inspection_ros image_uploader \ + --ros-args -p laptop_url:=http://192.168.1.XXX:8888/upload +""" + +import os +import json +import pathlib +import rclpy +from rclpy.node import Node +import requests +from visual_inspection_interfaces.srv import UploadImages + + +class ImageUploaderNode(Node): + + def __init__(self): + super().__init__('image_uploader') + + # ── Laptop HTTP server URL ───────────────────────────────────────── + # Default port 8888 — must match laptop_receiver.py + # Override with: --ros-args -p laptop_url:=http://192.168.X.X:8888/upload + self.declare_parameter('laptop_url', 'http://192.168.8.62:8888/upload') + self.laptop_url = self.get_parameter('laptop_url').get_parameter_value().string_value + + # ── ROS2 Service ─────────────────────────────────────────────────── + self.srv = self.create_service( + UploadImages, + '/visual_inspection/upload_images', + self._handle_upload + ) + + self.get_logger().info(f'Image uploader ready — target: {self.laptop_url}') + self.get_logger().info('Service: /visual_inspection/upload_images') + + # ────────────────────────────────────────────────────────────────────── + def _handle_upload(self, request, response): + """ + Called by BT. Reads each file path from request.image_paths, + finds metadata.json in the same folder, and POSTs everything to laptop. + """ + image_paths = list(request.image_paths) + session_label = request.session_label or 'inspection' + + if not image_paths: + response.success = False + response.info = 'No image paths provided' + response.uploaded_count = 0 + return response + + self.get_logger().info( + f'[UPLOAD] {len(image_paths)} paths → {self.laptop_url} label={session_label}') + + uploaded = 0 + errors = [] + + # Collect unique parent folders (each instance folder has one metadata.json) + instance_folders = {str(pathlib.Path(p).parent) for p in image_paths} + + for folder in instance_folders: + folder_path = pathlib.Path(folder) + + # ── Gather files to send ─────────────────────────────────────── + images_in_folder = sorted(folder_path.glob('img_*.jpg')) + meta_file = folder_path / 'metadata.json' + + files_to_send = [] + + for img in images_in_folder: + if img.exists(): + files_to_send.append( + ('files', (img.name, open(img, 'rb'), 'image/jpeg')) + ) + + if meta_file.exists(): + files_to_send.append( + ('files', ('metadata.json', open(meta_file, 'rb'), 'application/json')) + ) + + if not files_to_send: + errors.append(f'No files found in {folder}') + continue + + # ── HTTP POST ───────────────────────────────────────────────── + data = { + 'session_label': session_label, + 'subfolder': folder_path.name, # e.g. "instance_1" + 'object_class': folder_path.parent.name, # e.g. "gauge" + } + + try: + resp = requests.post( + self.laptop_url, + files=files_to_send, + data=data, + timeout=30 + ) + if resp.status_code == 200: + uploaded += len(images_in_folder) + (1 if meta_file.exists() else 0) + self.get_logger().info(f'[UPLOAD] ✓ {folder_path.name} — {resp.json()}') + else: + errors.append(f'{folder_path.name}: HTTP {resp.status_code}') + self.get_logger().warn(f'[UPLOAD] ✗ {folder_path.name}: {resp.status_code}') + except requests.exceptions.ConnectionError: + errors.append('Cannot reach laptop — is laptop_receiver.py running?') + self.get_logger().error( + f'[UPLOAD] Connection failed — is laptop running? URL={self.laptop_url}') + break + except requests.exceptions.Timeout: + errors.append(f'{folder_path.name}: timeout (>30s)') + self.get_logger().error(f'[UPLOAD] Timeout uploading {folder_path.name}') + finally: + # Close all file handles + for _, (_, fh, _) in files_to_send: + if hasattr(fh, 'close'): + fh.close() + + # ── Build response ──────────────────────────────────────────────── + if errors: + response.success = False + response.info = f'Errors: {"; ".join(errors)}' + else: + response.success = True + response.info = f'Uploaded {uploaded} files ({len(instance_folders)} folders) to laptop' + + response.uploaded_count = uploaded + self.get_logger().info(f'[UPLOAD] Done — success={response.success} | {response.info}') + return response + + +def main(args=None): + rclpy.init(args=args) + node = ImageUploaderNode() + rclpy.spin(node) + node.destroy_node() + rclpy.shutdown() + + +if __name__ == '__main__': + main() diff --git a/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/inspection_ws/visual_inspection_ros/visual_inspection_ros/inspection_service.py b/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/inspection_ws/visual_inspection_ros/visual_inspection_ros/inspection_service.py new file mode 100644 index 00000000..2ea5341a --- /dev/null +++ b/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/inspection_ws/visual_inspection_ros/visual_inspection_ros/inspection_service.py @@ -0,0 +1,478 @@ +#!/usr/bin/env python3 +""" +inspection_service.py — ROS2 Service Server for visual inspection. + +Subclasses IBVSActionServer to reuse all detection/IBVS/capture logic. +Exposes a single service endpoint: + /visual_inspection/inspect [visual_inspection_interfaces/srv/Inspect] + +BT calls this like an API endpoint — no feedback, just request + response. +Monitoring topics (optional, for debugging): + /visual_inspection/status String + /visual_inspection/debug Image (add in RViz2) + /visual_inspection/detections String +""" + +import rclpy +from rclpy.node import Node +from rclpy.callback_groups import ReentrantCallbackGroup +import threading +import time +from datetime import datetime + +from std_msgs.msg import Int16MultiArray, String +from sensor_msgs.msg import Image +from geometry_msgs.msg import Point +from cv_bridge import CvBridge + +from visual_inspection_interfaces.srv import Inspect + +# Reuse all pipeline internals from the action server module +from visual_inspection_ros.ibvs_action_server import ( + IBVSActionServer, load_yolo, calculate_pan, calculate_tilt, clamp +) + + +class InspectionService(IBVSActionServer): + """ + Wraps IBVSActionServer as a ROS2 Service. + Inherits all detection, IBVS, capture, sweep-scan logic. + Removes the ActionServer; adds a Service server instead. + """ + + def __init__(self): + # Call Node.__init__ directly — skip IBVSActionServer.__init__ + # so we don't create an ActionServer + Node.__init__(self, 'inspection_service') + + self.bridge = CvBridge() + self.cb_group = ReentrantCallbackGroup() + + import threading + self._lock_insta = threading.Lock() + self._lock_logi = threading.Lock() + self._frame_insta = None + self._frame_logi = None + + self.create_subscription(Image, '/visual_inspection/insta360/image_raw', + self._cb_insta, 10, callback_group=self.cb_group) + self.create_subscription(Image, '/visual_inspection/logitech/image_raw', + self._cb_logi, 10, callback_group=self.cb_group) + + self.servo_pub = self.create_publisher(Int16MultiArray, '/servo/pan_tilt', 10) + self.debug_pub = self.create_publisher(Image, '/visual_inspection/debug', 10) + self.status_pub = self.create_publisher(String, '/visual_inspection/status', 10) + self.ibvs_err_pub = self.create_publisher(Point, '/visual_inspection/ibvs_error', 10) + self.detections_pub = self.create_publisher(String, '/visual_inspection/detections', 10) + + self._mqtt_cfg = self._load_mqtt_cfg() + self._yolo_lock = threading.Lock() + self._mode = 'IDLE' + self._target_class = '' + self._ibvs_ex = self._ibvs_ey = self._ibvs_err = 0.0 + self._ibvs_iter = 0 + self._fps_counter = 0 + self._fps_time = time.time() + self._fps = 0.0 + self._goal_active = False + + # Service lock — only one inspection at a time + self._svc_lock = threading.Lock() + + self.get_logger().info('Loading YOLO model...') + self.model = load_yolo(self.ENGINE_PATH) + if self.model: + self.get_logger().info('YOLO model ready') + else: + self.get_logger().error('YOLO load failed') + + # ROS2 Service (replaces ActionServer) + self.create_service( + Inspect, '/visual_inspection/inspect', + self._service_callback, + callback_group=self.cb_group + ) + + # Debug timer (4Hz) — inherited method works as-is + self.create_timer(0.25, self._debug_timer_cb) + + self.get_logger().info('Inspection service ready: /visual_inspection/inspect') + self.get_logger().info('Monitor: /visual_inspection/status /visual_inspection/debug') + + # ------------------------------------------------------------------ + # Service callback — runs the full pipeline, returns when done + # ------------------------------------------------------------------ + + def _service_callback(self, request, response): + """Handles one inspection request. Blocks until complete.""" + + if not self._svc_lock.acquire(blocking=False): + self.get_logger().warn('Service busy — rejecting request') + response.success = False + response.status = 'busy' + response.info = 'Another inspection is already running' + return response + + try: + return self._run_inspection(request, response) + finally: + self._svc_lock.release() + + def _run_inspection(self, request, response): + """Core pipeline — same logic as execute_callback but for services.""" + self._goal_active = True + self._pipeline_start = time.time() # for pipeline_time_s in metadata.json + session_ts = datetime.now().strftime('%Y%m%d_%H%M%S') + + target_obj_raw = (request.target_object or '').strip() + location_label = (request.location_label or 'unknown').strip() + max_obj = request.max_objects or 0 + ret_home = request.return_home + + # Normalise → YOLO class name + resolved = self.KNOWN_CLASSES.get( + target_obj_raw.lower(), target_obj_raw.lower()) + + if resolved in self.OVERVIEW_ONLY_CLASSES: + self._target_class = '' + self.get_logger().info(f'[SVC] Overview-only: {target_obj_raw}') + return self._do_overview(request, response, session_ts, + target_obj_raw or location_label, ret_home) + + self._target_class = resolved + self._current_location_label = location_label # used by sweep _capture() + self.get_logger().info( + f'[SVC] Inspect "{target_obj_raw}" → YOLO "{self._target_class}" ' + f'loc="{location_label}"') + + # --- Stage 1: Insta360 search --- + front_dets, back_dets = self._search_insta_svc() + + # --- Gauge sweep fallback --- + if (not front_dets and not back_dets) and self._target_class == 'gauge': + self.get_logger().info('[SVC] Gauge sweep scan starting') + swept = self._gauge_sweep_svc(session_ts) + self._goal_active = False + self._pub_status('IDLE') + if ret_home: + self._home() + if swept: + response.success = True + response.status = 'ok' + response.objects_found = 1 + response.objects_inspected = 1 + response.object_in_back = False + response.image_paths = [str(p) for p in swept] + response.info = f'gauge found via sweep — {len(swept)} images' + else: + response.success = False + response.status = 'no_detection' + response.info = 'gauge not found (Insta360 + sweep)' + return response + + if not front_dets and not back_dets: + self._goal_active = False + self._pub_status('IDLE') + if ret_home: + self._home() + response.success = False + response.status = 'no_detection' + response.info = 'no objects detected on Insta360' + return response + + self._pub_detections(front_dets or [], back_dets or []) + total_found = len(front_dets or []) + len(back_dets or []) + + if not front_dets and back_dets: + self._goal_active = False + self._pub_status('IDLE') + response.success = False + response.status = 'all_in_back' + response.objects_found = total_found + response.object_in_back = True + response.info = f'{total_found} object(s) in back — rotate robot 180°' + return response + + if max_obj > 0: + front_dets = front_dets[:max_obj] + + # Number by confidence within class + class_counters = {} + numbered = [] + for det in sorted(front_dets, key=lambda d: d[6], reverse=True): + cls = det[8] + class_counters[cls] = class_counters.get(cls, 0) + 1 + numbered.append((*det, class_counters[cls])) + front_dets = numbered + + all_paths = [] + n_inspected = 0 + info_parts = [] + + for obj_idx, det in enumerate(front_dets, 1): + cx_obj, cy_obj, *_, conf, tid, cls_name, instance_num = det + + # Coarse — timed + self._pub_status('COARSE') + _coarse_t0 = time.time() + pan_c = calculate_pan(cx_obj, cy_obj) + tilt_c = calculate_tilt(cx_obj, cy_obj) + self._servo(tilt_c, pan_c) + self.get_logger().info( + f'[SVC] Object {obj_idx}: [{cls_name}#{instance_num}] ' + f'coarse pan={pan_c:.1f} tilt={tilt_c:.1f}') + time.sleep(self.COARSE_WAIT) + self._coarse_time_s = round(time.time() - _coarse_t0, 3) + + # Wait for Logitech + self._pub_status('IBVS') + deadline = time.time() + self.LOGI_FIRST_DET_WAIT + first_det = False + while time.time() < deadline: + logi_dets, _, _ = self._detect_logi() + if logi_dets: + first_det = True + break + time.sleep(0.1) + + if not first_det: + self.get_logger().warn( + f'[SVC] Object {obj_idx} not visible in Logitech — skipping') + continue + + # IBVS — pass a dummy object with is_cancel_requested=False + class _DummyHandle: + is_cancel_requested = False + def publish_feedback(self, _): pass + + class _DummyFeedback: + current_step = '' + current_object = 0 + ibvs_error_px = 0.0 + + centred = self._ibvs(_DummyHandle(), pan_c, tilt_c, + _DummyFeedback(), obj_idx) + if not centred: + self.get_logger().warn( + f'[SVC] Object {obj_idx} IBVS did not converge — skipping') + continue + + # Capture + self._pub_status('CAPTURING') + time.sleep(self.FOCUS_WAIT) + + paths = self._capture( + self.IMAGES_PER_OBJ, obj_id=instance_num, + session_ts=session_ts, cls_name=cls_name, + conf_score=conf, + ibvs_err=self._ibvs_err, + ibvs_iter=self._ibvs_iter, + ibvs_converged=centred, + location_label=location_label) + + ov_paths = self._capture_insta_overview( + session_ts, location_label, count=1, + folder='inspection', obj_cls=cls_name, obj_id=instance_num) + + obj_paths = paths + ov_paths + all_paths.extend(obj_paths) + self._mqtt(obj_paths, instance_num, + session_ts=session_ts, cls_name=cls_name) + n_inspected += 1 + info_parts.append( + f'{cls_name}#{instance_num}(conf={conf:.2f})') + self.get_logger().info( + f'[SVC] {cls_name}#{instance_num} done — {len(obj_paths)} images') + + self._goal_active = False + self._pub_status('IDLE') + if ret_home: + self._home() + + response.success = n_inspected > 0 + response.status = 'ok' if n_inspected > 0 else 'ibvs_timeout' + response.objects_found = total_found + response.objects_inspected = n_inspected + response.object_in_back = len(back_dets) > 0 + response.image_paths = [str(p) for p in all_paths] + response.info = ( + ', '.join(info_parts) + f' — {len(all_paths)} images saved' + if info_parts else 'no objects successfully inspected') + self.get_logger().info( + f'[SVC] Done: {n_inspected}/{len(front_dets)} inspected, ' + f'{len(all_paths)} images. Back={len(back_dets)}') + return response + + # ------------------------------------------------------------------ + # Overview-only (unknown / main_cylinder) + # ------------------------------------------------------------------ + + def _do_overview(self, request, response, session_ts, label, ret_home): + self._pub_status('OVERVIEW') + self._home() + time.sleep(2.0) + + overview_count = 2 + insta_paths = self._capture_insta_overview( + session_ts, label, count=overview_count, folder='overview') + logi_paths = self._capture( + n=1, obj_id=1, session_ts=session_ts, + cls_name=label, conf_score=0.0, + ibvs_converged=False, ibvs_err=0.0, ibvs_iter=0, + location_label=request.location_label or '') + + all_paths = insta_paths + logi_paths + self._mqtt(all_paths, 0, session_ts=session_ts, cls_name=label) + + self._pub_status('IDLE') + self._goal_active = False + if ret_home: + self._home() + + response.success = len(all_paths) > 0 + response.status = 'ok' if all_paths else 'no_frames' + response.objects_found = len(all_paths) + response.objects_inspected = 0 + response.object_in_back = False + response.image_paths = [str(p) for p in all_paths] + response.info = ( + f'overview-only ({label}): ' + f'{len(insta_paths)} Insta360 + {len(logi_paths)} Logitech') + return response + + # ------------------------------------------------------------------ + # Service-compatible versions of search/sweep (no goal_handle) + # ------------------------------------------------------------------ + + def _search_insta_svc(self): + """Like _search_insta but no goal_handle (service context).""" + self._mode = 'DETECTING' + timeout = (self.GAUGE_INSTA_TIMEOUT + if self._target_class == 'gauge' + else self.INSTA_SEARCH_TIMEOUT) + self.get_logger().info(f'[SVC] Searching Insta360 up to {timeout}s') + deadline = time.time() + timeout + + while time.time() < deadline: + front, back, _, dbg = self._detect_insta() + logi_f = self._get_logi() + self._publish_debug(dbg, logi_f) + if front or back: + self.get_logger().info( + f'[SVC] Found: {len(front)} front, {len(back)} back') + return front, back + time.sleep(0.1) + + self.get_logger().warn('[SVC] Insta360 search timeout') + return [], [] + + def _gauge_sweep_svc(self, session_ts): + """Smooth sweep for gauge using waypoints.""" + import numpy as np + self._mode = 'SWEEP' + self.get_logger().info('[SVC] Gauge sweep: T50(20→160), T80(160→20), T110(50→130)') + + SPEED = 40.0 # degrees per second + HZ = 30 # servo update rate + CHK = 3 # check detection every N steps + + found_dets = [] + cur_tilt = 90 + cur_pan = 90 + + def smooth_move(t0, p0, t1, p1): + """Move servo smoothly from (t0,p0) to (t1,p1). Returns dets, end_t, end_p.""" + steps = max(int(max(abs(t1-t0), abs(p1-p0)) / SPEED * HZ), 1) + for i, (t, p) in enumerate(zip( + np.linspace(t0, t1, steps+1), + np.linspace(p0, p1, steps+1) + )): + self._servo(float(t), float(p)) + if i % CHK == 0: + dets, _, dbg = self._detect_logi() + self._publish_debug(self._get_insta(), dbg) + if dets: + return dets, float(t), float(p) + time.sleep(1.0 / HZ) + return [], float(t1), float(p1) + + waypoints = [ + (50, 20), + (50, 160), + (80, 160), + (80, 20), + (110, 50), + (110, 130) + ] + + for wp_tilt, wp_pan in waypoints: + res, cur_tilt, cur_pan = smooth_move(cur_tilt, cur_pan, wp_tilt, wp_pan) + if res: + found_dets = res + break + + # Final check at waypoint + dets, _, dbg = self._detect_logi() + self._publish_debug(self._get_insta(), dbg) + if dets: + found_dets = dets + break + + if not found_dets: + self.get_logger().warn('[SVC] Gauge sweep: not found') + return [] + + self.get_logger().info(f'[SVC] Gauge found in sweep — starting IBVS') + self._pub_status('IBVS') + + class _DH: + is_cancel_requested = False + def publish_feedback(self, _): pass + + class _FB: + current_step = 'ibvs' + current_object = 1 + ibvs_error_px = 0.0 + + centred = self._ibvs(_DH(), cur_pan, cur_tilt, _FB(), 1) + if not centred: + self.get_logger().warn('[SVC] Sweep IBVS did not converge') + return [] + + self._pub_status('CAPTURING') + time.sleep(self.FOCUS_WAIT) + conf = found_dets[0][6] + paths = self._capture(self.IMAGES_PER_OBJ, obj_id=1, + session_ts=session_ts, cls_name='gauge', + conf_score=conf, + ibvs_err=self._ibvs_err, + ibvs_iter=self._ibvs_iter, + ibvs_converged=centred, + location_label=getattr(self, '_current_location_label', '')) + ov = self._capture_insta_overview(session_ts, 'sweep_gauge', + count=1, folder='inspection', + obj_cls='gauge', obj_id=1) + all_p = paths + ov + self._mqtt(all_p, 1, session_ts=session_ts, cls_name='gauge') + return all_p + + + +# --------------------------------------------------------------------------- + +def main(args=None): + rclpy.init(args=args) + node = InspectionService() + executor = rclpy.executors.MultiThreadedExecutor() + executor.add_node(node) + try: + executor.spin() + except KeyboardInterrupt: + pass + finally: + node.destroy_node() + rclpy.shutdown() + + +if __name__ == '__main__': + main() diff --git a/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/integrated_pipeline.py b/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/integrated_pipeline.py old mode 100644 new mode 100755 diff --git a/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/jetson_workspace/docs/OPTIMIZATION_PLAN.md b/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/jetson_workspace/docs/OPTIMIZATION_PLAN.md new file mode 100644 index 00000000..fe415e0a --- /dev/null +++ b/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/jetson_workspace/docs/OPTIMIZATION_PLAN.md @@ -0,0 +1,437 @@ +# IBVS Pipeline Optimization Plan +## Visual Inspection System — Jetson Orin Nano + +--- + +## Current Status + +| Metric | Current | Target | +|--------|---------|--------| +| Pipeline FPS | ~3 FPS | ~25+ FPS | +| YOLO backend | PyTorch (CPU) | TensorRT (GPU, FP16) | +| Camera format | YUYV (uncompressed) | MJPEG (compressed → faster) | +| PID location | Python on Jetson | C++ on Arduino | +| IBVS Y-axis error | Bounding box drifts down | Corrected with `cy` offset | + +--- + +## 🔧 Pre-Steps (Run Once Before Any Phase) + +### Enable Jetson Max Performance Mode +Run these on the **Jetson** before any testing: + +```bash +# Set maximum power mode (all CPU + GPU cores ON) +sudo nvpmodel -m 0 + +# Set all cores to maximum frequency +sudo jetson_clocks + +# Verify power mode +sudo nvpmodel -q + +# Optional: Install jtop to monitor GPU/CPU/RAM live +sudo pip install jetson-stats +sudo reboot +jtop # run after reboot +``` + +> ⚠️ These settings reset on reboot. Add to `/etc/rc.local` for permanent effect. + +--- + +## Phase 1 — TensorRT Acceleration (HIGHEST IMPACT) + +### Why +- YOLO currently runs on **CPU** via PyTorch → very slow on Jetson +- TensorRT runs on **GPU** → 3-5x faster on Jetson Orin Nano +- Expected: **3 FPS → 15-25 FPS** (pipeline total) + +### Benchmark Reference (Ultralytics Official — Orin Nano Super, similar to Orin Nano) + +| Format | Inference Time | Speed vs PyTorch | +|--------|---------------|-----------------| +| PyTorch | 13.70 ms/img | 1x (baseline) | +| TorchScript | 13.69 ms/img | ~1x | +| ONNX | 14.47 ms/img | ~1x | +| **TensorRT FP32** | **7.44 ms/img** | **1.8x** | +| **TensorRT FP16** | **4.53 ms/img** | **3x** | +| **TensorRT INT8** | **3.70 ms/img** | **3.7x** | + +### Step 1.1 — Check JetPack Version + +```bash +# On Jetson +cat /etc/nv_tegra_release +# OR +dpkg -l | grep nvidia-jetpack +``` + +Expected output: `JetPack 6.x` (Ubuntu 22.04, Python 3.10) + +### Step 1.2 — Install Required Packages + +```bash +cd ~/Documents/Visual_Inspection_ws +source venv/bin/activate + +# Install ultralytics with export dependencies +pip install "ultralytics[export]" + +# Install correct PyTorch wheel for JetPack 6.1 + ARM64 +# (The default pip torch doesn't support Jetson GPU properly) +pip install https://github.com/ultralytics/assets/releases/download/v0.0.0/torch-2.5.0a0+872d972e41.nv24.08-cp310-cp310-linux_aarch64.whl +pip install https://github.com/ultralytics/assets/releases/download/v0.0.0/torchvision-0.20.0a0+afc54f7-cp310-cp310-linux_aarch64.whl + +# Install cuSPARSELt (fixes torch dependency issue) +wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/arm64/cuda-keyring_1.1-1_all.deb +sudo dpkg -i cuda-keyring_1.1-1_all.deb +sudo apt-get update +sudo apt-get -y install libcusparselt0 libcusparselt-dev + +# Install onnxruntime-gpu for ARM64 (JetPack 6, Python 3.10) +pip install https://github.com/ultralytics/assets/releases/download/v0.0.0/onnxruntime_gpu-1.23.0-cp310-cp310-linux_aarch64.whl +``` + +### Step 1.3 — Export YOLO to TensorRT + +```bash +cd ~/Documents/Visual_Inspection_ws +source venv/bin/activate + +python3 -c " +from ultralytics import YOLO + +model = YOLO('weights/yolo11n.pt') + +# Export with FP16 (best speed/accuracy balance for Orin Nano) +model.export( + format='engine', + device=0, # GPU 0 + half=True, # FP16 — best for Orin Nano + imgsz=640, + workspace=4 # GB of GPU memory to use +) +print('TensorRT engine exported to weights/yolo11n.engine') +" +``` + +> ⏱️ This takes **10-20 minutes** on first run (TensorRT compilation). +> The `.engine` file is Jetson-specific — do NOT copy it to other machines. + +### Step 1.4 — Verify TensorRT Engine Works + +```bash +python3 -c " +from ultralytics import YOLO +import time + +model = YOLO('weights/yolo11n.engine') + +# Warmup +for _ in range(3): + model('boats.jpg', verbose=False) + +# Benchmark +start = time.time() +for _ in range(30): + model('boats.jpg', verbose=False) +elapsed = time.time() - start + +print(f'TensorRT FPS: {30/elapsed:.1f}') +print(f'Per frame: {elapsed/30*1000:.1f} ms') +" +``` + +### Step 1.5 — Update Pipeline to Use TensorRT + +In `jetson_workspace/ibvs_pipeline.py`, change: +```python +# Change this line in Config class: +YOLO_MODEL = "weights/yolo11n.pt" +# TO: +YOLO_MODEL = "weights/yolo11n.engine" +``` + +The pipeline automatically uses GPU when `.engine` is loaded — no other changes needed. + +### Step 1.6 — Test and Push + +```bash +# On Jetson +python3 ibvs_pipeline.py --headless +# Verify: [FPS] should now show 15-25+ + +# On laptop — push to GitHub +cd /home/dinethra/Jetson_orin_nano +git add jetson_workspace/ibvs_pipeline.py +git commit -m "Phase 1: Switch to TensorRT FP16 engine for YOLO inference" +git push +``` + +--- + +## Phase 2 — Camera Optimization (MJPEG + Threading) + +### Why +- Default camera format is **YUYV** (raw uncompressed): 640×480 × 2 bytes = 614KB per frame USB transfer +- **MJPEG** mode: ~30-50KB per frame (10x less USB bandwidth) +- **Threading**: camera reads happen in background, pipeline never waits for camera + +### Expected Improvement: +3-8 FPS + +### Step 2.1 — Add MJPEG Mode to Camera Open + +In `jetson_workspace/ibvs_pipeline.py`, update camera initialization: + +```python +def open_camera_mjpeg(idx, width, height): + """Open camera in MJPEG mode for faster USB transfer""" + cap = cv2.VideoCapture(idx) + # Force MJPEG format (much faster than YUYV) + cap.set(cv2.CAP_PROP_FOURCC, cv2.VideoWriter_fourcc('M','J','P','G')) + cap.set(cv2.CAP_PROP_FRAME_WIDTH, width) + cap.set(cv2.CAP_PROP_FRAME_HEIGHT, height) + cap.set(cv2.CAP_PROP_FPS, 30) + cap.set(cv2.CAP_PROP_BUFFERSIZE, 1) # Minimum buffer = always fresh frame + return cap +``` + +### Step 2.2 — Add Camera Thread Class + +```python +import threading + +class CameraReader: + """Non-blocking camera reader — always has latest frame ready""" + def __init__(self, idx, width, height): + self.cap = open_camera_mjpeg(idx, width, height) + self.frame = None + self.ret = False + self.lock = threading.Lock() + self._running = True + self.thread = threading.Thread(target=self._read_loop, daemon=True) + self.thread.start() + + def _read_loop(self): + while self._running: + ret, frame = self.cap.read() + with self.lock: + self.ret = ret + self.frame = frame + + def read(self): + with self.lock: + return self.ret, self.frame.copy() if self.frame is not None else None + + def release(self): + self._running = False + self.cap.release() +``` + +### Step 2.3 — Test and Push + +```bash +git add jetson_workspace/ibvs_pipeline.py +git commit -m "Phase 2: MJPEG camera mode + threaded capture for lower latency" +git push +``` + +--- + +## Phase 3 — Arduino PID Offload (Lower Servo Latency) + +### Why +- Currently: Python calculates PID → sends angle to Arduino → Arduino moves servo +- Python serial roundtrip adds 10-50ms delay per servo command +- **Better**: Jetson sends pixel error `(ex, ey)` → Arduino does PID in C++ → moves servo directly +- C++ PID loop can run at 1000Hz vs Python's ~30Hz + +### Architecture Change + +``` +BEFORE: +Jetson (Python) → YOLO → PID calc → Serial angle (90,95) → Arduino → Servo + +AFTER: +Jetson (Python) → YOLO → Serial error (ex=15,ey=-8) → Arduino (C++ PID) → Servo +``` + +### Step 3.1 — Update Arduino Sketch + +In `jetson_workspace/arduino/pan_tilt_control.ino`, add PID loop: + +```cpp +// Receive: "ex,ey\n" (pixel errors from Jetson) +// Arduino does PID internally and moves servos + +float kp = 0.12, ki = 0.002, kd = 0.02; +float integral_pan = 0, integral_tilt = 0; +float prev_err_pan = 0, prev_err_tilt = 0; +unsigned long last_time = 0; + +void loop() { + if (Serial.available()) { + String data = Serial.readStringUntil('\n'); + int comma = data.indexOf(','); + if (comma > 0) { + float ex = data.substring(0, comma).toFloat(); + float ey = data.substring(comma+1).toFloat(); + + unsigned long now = millis(); + float dt = (now - last_time) / 1000.0; + last_time = now; + + // PAN PID + integral_pan += ex * dt; + integral_pan = constrain(integral_pan, -50, 50); + float d_pan = (ex - prev_err_pan) / dt; + float delta_pan = -(kp*ex + ki*integral_pan + kd*d_pan); + prev_err_pan = ex; + + // TILT PID + integral_tilt += ey * dt; + integral_tilt = constrain(integral_tilt, -50, 50); + float d_tilt = (ey - prev_err_tilt) / dt; + float delta_tilt = -(kp*ey + ki*integral_tilt + kd*d_tilt); + prev_err_tilt = ey; + + // Apply corrections + pan_angle = constrain(pan_angle + delta_pan, 0, 180); + tilt_angle = constrain(tilt_angle + delta_tilt, 20, 160); + + pan_servo.write(pan_angle); + tilt_servo.write(180 - tilt_angle); + } + } +} +``` + +### Step 3.2 — Update Python Pipeline (Send error instead of angle) + +```python +# In FINE stage, instead of: +cmd = f"{inverted_tilt},{new_pan}\n" + +# Send pixel errors: +if arduino: + cmd = f"{int(e_x)},{int(e_y)}\n" + arduino.write(cmd.encode()) +``` + +### Step 3.3 — Upload to Arduino and Test + +```bash +# Upload new sketch via Arduino IDE on dev machine +# OR use arduino-cli on Jetson if available +arduino-cli compile --fqbn arduino:avr:uno jetson_workspace/arduino/pan_tilt_control/ +arduino-cli upload -p /dev/ttyACM0 --fqbn arduino:avr:uno jetson_workspace/arduino/pan_tilt_control/ +``` + +### Step 3.4 — Test and Push + +```bash +git add jetson_workspace/arduino/ jetson_workspace/ibvs_pipeline.py +git commit -m "Phase 3: Arduino PID offload — Jetson sends error, Arduino does C++ PID" +git push +``` + +--- + +## Phase 4 — IBVS Bug Fixes and Tuning + +### Bug 1: YOLO Bounding Box Shifted Down in Y-axis + +**Root cause**: The camera `cy` (optical center Y) may not match the actual principal point, +OR the camera is mounted slightly tilted downward. + +**Fix**: Add a Y-axis correction offset to the IBVS target: + +```python +# In Config class, add: +IBVS_Y_OFFSET = 20 # pixels — positive shifts target UP, tune this value + +# In IBVSController.compute_error(): +def compute_error(self, detected_point): + cx_detected, cy_detected = detected_point + e_x = cx_detected - self.cx + e_y = cy_detected - (self.cy - self.config.IBVS_Y_OFFSET) # apply offset + return e_x, e_y +``` + +**Tune `IBVS_Y_OFFSET`**: +- Run pipeline +- If box still below center → increase offset +- If box above center → decrease offset + +### Bug 2: Slow IBVS Convergence (Arrow Too Large) + +**Fix**: Allow simultaneous pan+tilt when error is large (remove sequential-only restriction for coarse errors): + +```python +# In compute_servo_correction(), change: +PAN_THRESHOLD = 2.0 # degrees + +# Allow tilt to move even when pan is large (just slower) +if abs(error_pan) > PAN_THRESHOLD: + error_tilt *= 0.3 # Allow 30% tilt correction while panning + # Don't fully freeze tilt +``` + +**Also increase KP for faster initial response**: +```python +IBVS_KP_PAN = 0.18 # was 0.12 — faster response +IBVS_KP_TILT = 0.18 # was 0.12 +``` + +### Bug 3: YOLO Only on Relevant Camera per Stage + +**Optimization**: In COARSE stage, only run YOLO on Insta360. In FINE stage, only run on Logitech. +This halves the YOLO inference load: + +```python +# Already done in pipeline — verify this is implemented correctly +if state == "COARSE": + results = model(frame_insta, ...) # ← only Insta360 +elif state == "FINE": + results = model(frame_logi, ...) # ← only Logitech +``` + +### Step 4 — Test and Push + +```bash +git add jetson_workspace/ibvs_pipeline.py +git commit -m "Phase 4: Fix Y-axis IBVS offset, tune PID gains for faster convergence" +git push +``` + +--- + +## Summary Timeline + +``` +Week 1: Phase 1 (TensorRT) → 3 FPS → ~20 FPS +Week 1: Phase 2 (Camera) → +3-5 FPS additional +Week 2: Phase 3 (Arduino) → Lower servo latency, same FPS but smoother +Week 2: Phase 4 (IBVS fixes)→ Accurate centering, no Y-axis drift +``` + +## Final Expected Performance + +| Metric | Before | After All Phases | +|--------|--------|-----------------| +| FPS | ~3 | ~25-30 | +| Servo latency | 50ms (Python) | ~5ms (Arduino C++) | +| Y-axis drift | Yes | Fixed | +| IBVS convergence | Slow (sequential) | Faster (simultaneous) | +| YOLO device | CPU | GPU (TensorRT FP16) | + +--- + +## Notes + +- Each phase is **independent** — implement and test one at a time +- Always test on Jetson before pushing to GitHub +- The TensorRT `.engine` file is **Jetson-specific** — do not commit it to GitHub + (add `*.engine` to `.gitignore`) +- Reference: [Ultralytics YOLO on NVIDIA Jetson](https://docs.ultralytics.com/guides/nvidia-jetson/) diff --git a/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/jetson_workspace/ibvs_pipeline.py b/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/jetson_workspace/ibvs_pipeline.py index 879558d6..4f19c0dc 100644 --- a/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/jetson_workspace/ibvs_pipeline.py +++ b/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/jetson_workspace/ibvs_pipeline.py @@ -419,7 +419,7 @@ def compute_servo_correction(self, e_x, e_y, dt=0.033): d_tilt = self.kd_tilt * (error_tilt - self.prev_error_tilt) / dt self.prev_error_tilt = error_tilt - delta_tilt = -(p_tilt + i_tilt + d_tilt) + delta_tilt = (p_tilt + i_tilt + d_tilt) # +ve: tilt up when object above centre # Velocity limiting for tilt MAX_SPEED_TILT = 3.0 # balanced speed diff --git a/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/jetson_workspace/run.sh b/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/jetson_workspace/run.sh old mode 100644 new mode 100755 diff --git a/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/jetson_workspace/tests/04_test_arduino_serial.py b/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/jetson_workspace/tests/04_test_arduino_serial.py new file mode 100644 index 00000000..d374c83c --- /dev/null +++ b/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/jetson_workspace/tests/04_test_arduino_serial.py @@ -0,0 +1,100 @@ +#!/usr/bin/env python3 +""" +Arduino Serial Test — send commands and see responses. +Usage: + python3 04_test_arduino_serial.py + +Command format the Arduino expects: + tilt,pan\n e.g. 90,90\n (tilt=90, pan=90 = center) +""" + +import serial +import glob +import os +import time + +# ── Find Arduino ───────────────────────────────────────────────────────────── +def find_arduino(): + ARDUINO_VIDS = {'2341', '1a86', '0403'} + # Try udev symlink first + if os.path.exists('/dev/arduino'): + print("✅ Found via udev symlink: /dev/arduino") + return '/dev/arduino' + # Scan all serial ports + ports = sorted(glob.glob('/dev/ttyACM*') + glob.glob('/dev/ttyUSB*')) + for port in ports: + try: + dev_name = os.path.basename(port) + check = os.path.realpath(f'/sys/class/tty/{dev_name}') + for _ in range(8): + vid_file = os.path.join(check, 'idVendor') + if os.path.exists(vid_file): + with open(vid_file) as f: + vid = f.read().strip() + if vid in ARDUINO_VIDS: + print(f"✅ Found Arduino at {port} (VID={vid})") + return port + break + check = os.path.dirname(check) + except: + pass + return None + +# ── Main ────────────────────────────────────────────────────────────────────── +port = find_arduino() +if not port: + print("❌ Arduino not found! Check USB connection.") + exit(1) + +try: + arduino = serial.Serial(port, 9600, timeout=1) + time.sleep(2) # wait for Arduino to reset + print(f"\n🔌 Connected to Arduino at {port} (9600 baud)") + print("=" * 50) + print("Commands:") + print(" tilt,pan → e.g. 90,90 (center)") + print(" tilt,pan → e.g. 70,120 (tilt=70, pan=120)") + print(" q → quit") + print(" center → send 90,90 (center both)") + print("=" * 50) + print() + + while True: + try: + cmd = input("Send > ").strip() + + if cmd.lower() == 'q': + print("Bye!") + break + + elif cmd.lower() == 'center': + cmd = '90,90' + + elif cmd == '': + continue + + # Send command + full_cmd = cmd + '\n' + arduino.write(full_cmd.encode()) + print(f" ✉ Sent: {cmd}") + + # Read any response from Arduino + time.sleep(0.1) + while arduino.in_waiting: + response = arduino.readline().decode().strip() + if response: + print(f" ← Arduino: {response}") + + except KeyboardInterrupt: + print("\nBye!") + break + +except serial.SerialException as e: + print(f"❌ Serial error: {e}") + print(" Try: sudo chmod 666 /dev/ttyACM0") + +finally: + try: + arduino.close() + except: + pass diff --git a/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/jetson_workspace/tests/fix_ch340_permanent.sh b/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/jetson_workspace/tests/fix_ch340_permanent.sh new file mode 100644 index 00000000..39792303 --- /dev/null +++ b/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/jetson_workspace/tests/fix_ch340_permanent.sh @@ -0,0 +1,121 @@ +#!/bin/bash +# ============================================================ +# PERMANENT CH340 SERIAL FIX FOR JETSON ORIN NANO +# Run ONCE as: sudo bash fix_ch340_permanent.sh +# After this, the PCB/Arduino will ALWAYS be at /dev/arduino +# on every boot — no manual modprobe needed ever again. +# ============================================================ + +set -e +echo "" +echo "╔══════════════════════════════════════════════╗" +echo "║ CH340 PERMANENT FIX — JETSON ORIN NANO ║" +echo "╚══════════════════════════════════════════════╝" +echo "" + +# ── Step 1: Check if ch34x module is already installed ────────────────────── +echo "▶ Step 1: Checking ch34x module..." +if find /lib/modules/$(uname -r) -name "ch34x.ko*" 2>/dev/null | grep -q ch34x; then + echo " ✅ ch34x module already installed" +else + echo " ⚠️ ch34x not installed — building from source..." + + # Check if CH341SER source exists + if [ ! -d "/home/$SUDO_USER/CH341SER" ]; then + echo " 📥 Cloning CH341SER..." + cd /home/$SUDO_USER + sudo -u $SUDO_USER git clone https://github.com/juliagoda/CH341SER.git + fi + + cd /home/$SUDO_USER/CH341SER + + # Install kernel headers if needed + if [ ! -d "/usr/src/linux-headers-$(uname -r)" ]; then + echo " 📦 Installing kernel headers..." + apt-get install -y nvidia-l4t-kernel-headers 2>/dev/null || \ + apt-get install -y linux-headers-$(uname -r) 2>/dev/null || \ + echo " ⚠️ Headers not found via apt — trying to build anyway..." + fi + + echo " 🔨 Building ch34x driver..." + sudo -u $SUDO_USER make clean 2>/dev/null || true + sudo -u $SUDO_USER make + + echo " 📦 Installing ch34x driver permanently..." + make install + depmod -a + + echo " ✅ ch34x driver installed" +fi + +# ── Step 2: Auto-load ch34x on every boot ─────────────────────────────────── +echo "" +echo "▶ Step 2: Setting ch34x to auto-load on boot..." +echo 'ch34x' > /etc/modules-load.d/ch34x.conf +echo " ✅ Created /etc/modules-load.d/ch34x.conf" + +# ── Step 3: Disable brltty (steals CH340 devices) ─────────────────────────── +echo "" +echo "▶ Step 3: Disabling brltty (the CH340 thief)..." +systemctl stop brltty 2>/dev/null || true +systemctl disable brltty 2>/dev/null || true +systemctl mask brltty 2>/dev/null || true +# Also mask brltty-udev if it exists +systemctl mask brltty-udev 2>/dev/null || true +apt-get remove -y brltty 2>/dev/null || true +echo " ✅ brltty disabled and removed" + +# ── Step 4: Permanent udev rule → /dev/arduino symlink ────────────────────── +echo "" +echo "▶ Step 4: Creating permanent udev rule..." +cat > /etc/udev/rules.d/99-ch340-arduino.rules << 'EOF' +# CH340 USB Serial Converter (PCB / Arduino clone) +# Always creates /dev/arduino symlink with open permissions +SUBSYSTEM=="tty", ATTRS{idVendor}=="1a86", ATTRS{idProduct}=="7523", \ + SYMLINK+="arduino", MODE="0666", \ + TAG+="systemd" +EOF +echo " ✅ Created /etc/udev/rules.d/99-ch340-arduino.rules" + +# ── Step 5: Reload udev rules ──────────────────────────────────────────────── +echo "" +echo "▶ Step 5: Reloading udev rules..." +udevadm control --reload-rules +udevadm trigger +echo " ✅ udev rules reloaded" + +# ── Step 6: Load ch34x NOW (no reboot needed for this session) ─────────────── +echo "" +echo "▶ Step 6: Loading ch34x driver now..." +modprobe ch34x 2>/dev/null || insmod /home/$SUDO_USER/CH341SER/ch34x.ko 2>/dev/null || true +sleep 1 +udevadm trigger +sleep 1 + +# ── Step 7: Verify ─────────────────────────────────────────────────────────── +echo "" +echo "▶ Step 7: Verifying..." +if ls /dev/ttyUSB* 2>/dev/null | head -1; then + echo " ✅ /dev/ttyUSB* detected" +else + echo " ⚠️ No /dev/ttyUSB* yet — replug the USB cable" +fi + +if ls -la /dev/arduino 2>/dev/null; then + echo " ✅ /dev/arduino symlink ready" +else + echo " ⚠️ /dev/arduino not yet — replug USB cable once" +fi + +echo "" +echo "╔══════════════════════════════════════════════╗" +echo "║ ✅ ALL DONE! ║" +echo "║ ║" +echo "║ Replug USB cable once, then: ║" +echo "║ ls /dev/arduino → should appear ║" +echo "║ python3 wasd_servo_control.py ║" +echo "║ ║" +echo "║ After every reboot: /dev/arduino is READY ║" +echo "║ No modprobe, no chmod, nothing! ║" +echo "╚══════════════════════════════════════════════╝" +echo "" diff --git a/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/jetson_workspace/tests/wasd_servo_control.py b/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/jetson_workspace/tests/wasd_servo_control.py new file mode 100644 index 00000000..fd069973 --- /dev/null +++ b/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/jetson_workspace/tests/wasd_servo_control.py @@ -0,0 +1,304 @@ +#!/usr/bin/env python3 +""" +Pan-Tilt Servo Controller — Gamepad (F710) + WASD Keyboard +============================================================ +Prefers Logitech F710 gamepad for TRUE simultaneous analog control. +Falls back to WASD keyboard if no gamepad found. + +GAMEPAD (recommended): + Left stick = Pan (X axis) + Tilt (Y axis) simultaneously + Right bumper (RB) = increase speed + Start/B = Center servos + Analog control = gentle push → slow, full push → fast + +WASD KEYBOARD (fallback): + W/S = Tilt A/D = Pan R = Center Q = Quit + + DIAGONAL SINGLE-KEY SHORTCUTS (always work!): + Q = W+A (Tilt UP + Pan LEFT ) + E = W+D (Tilt UP + Pan RIGHT) + Z = A+S (Pan LEFT + Tilt DOWN) + X = S+D (Tilt DOWN + Pan RIGHT) +""" + +import serial, glob, os, sys, time, tty, termios, select, threading, struct + +# ── Config ──────────────────────────────────────────────────────────────────── +BAUD = 9600 +PAN_MIN, PAN_MAX = 0, 180 +TILT_MIN, TILT_MAX = 20, 160 +LOOP_HZ = 30 + +# Gamepad +AXIS_DEADZONE = 0.08 # ignore stick input below this +GAMEPAD_SPEED = 45.0 # max deg/s for gamepad + +# Keyboard +KB_VEL_MAX = 60.0 +KB_ACCEL = 55.0 +KB_DECEL = 90.0 +KB_HOLD_WINDOW = 0.30 # seconds — key stays active after last press + +# ── Port Detection ──────────────────────────────────────────────────────────── +def find_port(): + if os.path.exists('/dev/arduino'): return '/dev/arduino' + for port in sorted(glob.glob('/dev/ttyUSB*') + glob.glob('/dev/ttyACM*')): + try: + check = os.path.realpath(f'/sys/class/tty/{os.path.basename(port)}') + for _ in range(8): + vf = os.path.join(check, 'idVendor') + if os.path.exists(vf): + if open(vf).read().strip() in {'1a86','2341','0403'}: return port + break + check = os.path.dirname(check) + except: pass + ports = sorted(glob.glob('/dev/ttyUSB*') + glob.glob('/dev/ttyACM*')) + return ports[0] if ports else None + +# ── Gamepad Detection ───────────────────────────────────────────────────────── +def find_gamepad(): + """Find joystick device — tries /dev/input/js* """ + # Look for F710 specifically + try: + for js in sorted(glob.glob('/dev/input/js*')): + if os.access(js, os.R_OK): + return js + except: pass + return None + +# ── Display ─────────────────────────────────────────────────────────────────── +def draw(pan, tilt, vp, vt, mode, extra, msg): + B = 28 + def b(v,lo,hi): p=max(0,min(B,int(((v-lo)/(hi-lo))*B))); return '['+('─'*p)+'●'+('─'*(B-p))+']' + def vb(v,mx): p=max(0,min(B,int((v/mx)*B))); return '['+('█'*p)+('░'*(B-p))+']' + spd = max(abs(vp), abs(vt)) + mx = GAMEPAD_SPEED if mode=='GAMEPAD' else KB_VEL_MAX + print('\033[H\033[J', end='') + print("╔══════════════════════════════════════╗") + print(f"║ 🎮 PAN-TILT [{mode:<10}] ║") + print("╠══════════════════════════════════════╣") + if mode == 'GAMEPAD': + print("║ Left Stick = Pan + Tilt (both axes) ║") + print("║ Start/B = Center Trig=Speed boost ║") + else: + print("║ W↑ Tilt S↓ Tilt ║") + print("║ A← Pan D→ Pan R=Center Q=Quit ║") + print("╠══════════════════════════════════════╣") + print(f"║ PAN :{pan:>6.1f}° {b(pan, PAN_MIN,PAN_MAX)} ║") + print(f"║ TILT :{tilt:>6.1f}° {b(tilt,TILT_MIN,TILT_MAX)} ║") + print(f"║ SPEED:{spd:>5.0f}°/s {vb(spd,mx)} ║") + print(f"║ {'INPUT: '+extra:<36} ║") + print("╠══════════════════════════════════════╣") + print(f"║ {(msg or '')[:36]:<36} ║") + print("╚══════════════════════════════════════╝") + +# ── Serial Send ─────────────────────────────────────────────────────────────── +def send(ser, pan, tilt): + ser.write(f"{180-int(round(tilt))},{int(round(pan))}\n".encode()) + +# ── Gamepad Control Loop ────────────────────────────────────────────────────── +def run_gamepad(ser, js_path): + """ + Read joystick events (Linux js protocol: 8-byte structs). + Format: 4B time | 2B value | 1B type | 1B number + type 1 = button, type 2 = axis + F710 axes: 0=LX, 1=LY, 2=LT, 3=RX, 4=RY, 5=RT + Buttons: 0=A,1=B,2=X,3=Y,4=LB,5=RB,6=LT,7=RT,8=Back,9=Start + """ + pan, tilt = 90.0, 90.0 + vp = vt = 0.0 + axes = {} + msg = 'Left stick for pan+tilt simultaneously!' + dt = 1.0 / LOOP_HZ + + send(ser, pan, tilt) + + try: + js = open(js_path, 'rb') + except PermissionError: + print(f"❌ Permission denied: {js_path}") + print(f" Fix: sudo chmod a+r {js_path} or sudo usermod -a -G input $USER") + return False + + print(f"✅ Gamepad: {js_path}") + print(" Left stick = Pan + Tilt | Start/B = Center") + time.sleep(0.5) + + try: + while True: + t0 = time.time() + + # Read all pending events + while True: + r, _, _ = select.select([js], [], [], 0) + if not r: break + data = js.read(8) + if len(data) < 8: break + _, value, etype, number = struct.unpack('IhBB', data) + etype &= ~0x80 # strip init flag + + if etype == 2: # axis + axes[number] = value / 32767.0 + + if etype == 1 and value == 1: # button press + if number in (0, 1, 9): # A, B, Start = center + pan, tilt = 90.0, 90.0 + vp = vt = 0.0 + msg = '🏠 Center (90,90)' + send(ser, pan, tilt) + if number == 7: # RT button = quit + return True + + # Apply axes + lx = axes.get(0, 0.0) # left stick X → pan + ly = axes.get(1, 0.0) # left stick Y → tilt + + # Deadzone + if abs(lx) < AXIS_DEADZONE: lx = 0.0 + if abs(ly) < AXIS_DEADZONE: ly = 0.0 + + # Normalize (input is -1 to +1, apply speed) + vp = lx * GAMEPAD_SPEED # pan velocity + vt = -ly * GAMEPAD_SPEED # tilt velocity (invert Y: push up = tilt up) + + if lx != 0.0 or ly != 0.0: + pan = max(PAN_MIN, min(PAN_MAX, pan + vp * dt)) + tilt = max(TILT_MIN, min(TILT_MAX, tilt + vt * dt)) + msg = f'⬅➡P={pan:.0f}° ⬆⬇T={tilt:.0f}° ({lx:+.2f},{ly:+.2f})' + send(ser, pan, tilt) + + draw(pan, tilt, vp, vt, 'GAMEPAD', + f'LX={lx:+.2f} LY={ly:+.2f}', msg) + + elapsed = time.time() - t0 + time.sleep(max(0, dt - elapsed)) + + except KeyboardInterrupt: + pass + finally: + js.close() + send(ser, 90.0, 90.0) + + return True + +# ── Keyboard Control Loop ───────────────────────────────────────────────────── +def run_keyboard(ser): + pan, tilt = 90.0, 90.0 + vp = vt = 0.0 + stamps = {} + msg = 'Hold W then D together for diagonal!' + dt = 1.0 / LOOP_HZ + + send(ser, pan, tilt) + + fd = sys.stdin.fileno() + old = termios.tcgetattr(fd) + tty.setraw(fd) + + try: + while True: + t0 = time.time() + + while True: + r,_,_ = select.select([sys.stdin],[],[],0) + if not r: break + ch = os.read(fd,1).decode('utf-8',errors='ignore') + if ch == '\x1b': + r2,_,_ = select.select([sys.stdin],[],[],0.02) + if r2: + os.read(fd,1) + r3,_,_ = select.select([sys.stdin],[],[],0.02) + ch = {'A':'w','B':'s','C':'d','D':'a'}.get( + os.read(fd,1).decode('utf-8',errors='ignore') if r3 else '', '') if r2 else 'q' + else: ch='q' + k = ch.lower() + if k == '\x03': raise KeyboardInterrupt + if k == 'r': + pan=tilt=90.0; vp=vt=0.0; stamps.clear() + msg='🏠 Center (90,90)'; send(ser, pan, tilt) + elif k in ('w','a','s','d'): + now = time.time() + stamps[k] = now + # Cross-refresh: if other axis was recently active, keep it alive + # This makes W+D held together work — each key refreshes the other + if k in ('a', 'd'): # pan key → refresh tilt if active + for tk in ('w', 's'): + if tk in stamps and now - stamps[tk] < KB_HOLD_WINDOW: + stamps[tk] = now + elif k in ('w', 's'): # tilt key → refresh pan if active + for pk in ('a', 'd'): + if pk in stamps and now - stamps[pk] < KB_HOLD_WINDOW: + stamps[pk] = now + + # ── Diagonal single-key shortcuts ────────────── + elif k == 'q': # W+A → tilt up + pan left + stamps['w'] = stamps['a'] = time.time() + elif k == 'e': # W+D → tilt up + pan right + stamps['w'] = stamps['d'] = time.time() + elif k == 'z': # A+S → pan left + tilt down + stamps['a'] = stamps['s'] = time.time() + elif k == 'x': # S+D → tilt down + pan right + stamps['s'] = stamps['d'] = time.time() + + now = time.time() + active = {k for k,t in stamps.items() if now-t < KB_HOLD_WINDOW} + + mt = 'w' in active or 's' in active + mp = 'a' in active or 'd' in active + vt = min(KB_VEL_MAX, vt+KB_ACCEL*dt) if mt else max(0.0, vt-KB_DECEL*dt) + vp = min(KB_VEL_MAX, vp+KB_ACCEL*dt) if mp else max(0.0, vp-KB_DECEL*dt) + + parts, chg = [], False + if 'w' in active and vt>0: + tilt=min(TILT_MAX,tilt+vt*dt); parts.append(f'⬆T={tilt:.0f}°'); chg=True + elif 's' in active and vt>0: + tilt=max(TILT_MIN,tilt-vt*dt); parts.append(f'⬇T={tilt:.0f}°'); chg=True + if 'a' in active and vp>0: + pan=min(PAN_MAX, pan+vp*dt); parts.append(f'⬅P={pan:.0f}°'); chg=True + elif 'd' in active and vp>0: + pan=max(PAN_MIN, pan-vp*dt); parts.append(f'➡P={pan:.0f}°'); chg=True + + if parts: msg = ' '.join(parts)+f' {max(vp,vt):.0f}°/s' + if chg or vp>0 or vt>0: send(ser, pan, tilt) + + ks = '+'.join(sorted(active&{'w','a','s','d'})).upper() or 'none' + draw(pan, tilt, vp, vt, 'KEYBOARD', f'keys:[{ks}]', msg) + time.sleep(max(0, dt-(time.time()-t0))) + + except KeyboardInterrupt: + pass + finally: + termios.tcsetattr(fd, termios.TCSADRAIN, old) + send(ser, 90.0, 90.0) + +# ── Main ────────────────────────────────────────────────────────────────────── +def main(): + print("🔍 Finding servo controller...") + port = find_port() + if not port: + print("❌ No serial port found."); sys.exit(1) + if not os.access(port, os.R_OK|os.W_OK): + os.system(f'sudo chmod 666 {port}') + try: + ser = serial.Serial(port, BAUD, timeout=1) + time.sleep(2) + print(f"✅ Serial: {port}") + except Exception as e: + print(f"❌ {e}"); sys.exit(1) + + js = find_gamepad() + if js: + print(f"🎮 Gamepad found: {js} — using analog control!") + print(" (For keyboard instead: run with --keyboard flag)") + if '--keyboard' not in sys.argv: + run_gamepad(ser, js) + ser.close() + return + + print("⌨️ No gamepad found — using WASD keyboard") + run_keyboard(ser) + ser.close() + print("\n✅ Done!") + +if __name__ == '__main__': + main() diff --git a/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/single_camera_pipeline.py b/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/single_camera_pipeline.py old mode 100644 new mode 100755 diff --git a/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/test_full_pipeline.py b/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/test_full_pipeline.py old mode 100644 new mode 100755 diff --git a/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/test_scripts/00_check_devices.sh b/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/test_scripts/00_check_devices.sh old mode 100644 new mode 100755 diff --git a/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/test_scripts/01_test_cameras.py b/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/test_scripts/01_test_cameras.py old mode 100644 new mode 100755 diff --git a/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/test_scripts/02_yolo_detection_auto.py b/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/test_scripts/02_yolo_detection_auto.py old mode 100644 new mode 100755 diff --git a/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/test_scripts/02_yolo_detection_cpu.py b/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/test_scripts/02_yolo_detection_cpu.py old mode 100644 new mode 100755 diff --git a/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/test_scripts/03_export_to_tensorrt.py b/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/test_scripts/03_export_to_tensorrt.py old mode 100644 new mode 100755 diff --git a/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/test_scripts/04_yolo_detection_tensorrt.py b/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/test_scripts/04_yolo_detection_tensorrt.py old mode 100644 new mode 100755 diff --git a/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/test_scripts/05_hemisphere_mapping.py b/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/test_scripts/05_hemisphere_mapping.py old mode 100644 new mode 100755 diff --git a/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/test_scripts/05_yolo_detection_onnx_gpu.py b/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/test_scripts/05_yolo_detection_onnx_gpu.py old mode 100644 new mode 100755 diff --git a/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/test_scripts/06_arduino_test.py b/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/test_scripts/06_arduino_test.py old mode 100644 new mode 100755 diff --git a/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/test_scripts/07_detection_confirmation.py b/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/test_scripts/07_detection_confirmation.py old mode 100644 new mode 100755 diff --git a/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/test_scripts/test_bt_full_flow.py b/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/test_scripts/test_bt_full_flow.py new file mode 100644 index 00000000..0a81e566 --- /dev/null +++ b/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/test_scripts/test_bt_full_flow.py @@ -0,0 +1,198 @@ +#!/usr/bin/env python3 +""" +test_bt_full_flow.py — Simulates the full Behaviour Tree inspection flow +========================================================================= +This script acts exactly like a BT node would: + + Step 1: Call /visual_inspection/inspect → get image_paths + Step 2: Call /visual_inspection/upload_images → send images+metadata to laptop + +Run this INSTEAD of test_inspection_service.py when you want to test the +full end-to-end pipeline including the HTTP upload to laptop. + +REQUIREMENTS (all must be running first): + Jetson Terminal 1: ros2 run visual_inspection_ros camera_node + Jetson Terminal 2: ros2 run visual_inspection_ros servo_node + Jetson Terminal 3: ros2 run visual_inspection_ros inspection_service + Jetson Terminal 4: ros2 run visual_inspection_ros image_uploader + Laptop: python3 Evaluation_V_I_ws/laptop_receiver.py + +USAGE: + python3 test_bt_full_flow.py --object gauge --location engine_room_A + python3 test_bt_full_flow.py --object fire_extinguisher --location corridor_B + python3 test_bt_full_flow.py --object door --location engine_room_A +""" + +import argparse +import sys +import rclpy +from rclpy.node import Node +from visual_inspection_interfaces.srv import Inspect, UploadImages + + +# ── ANSI colours ────────────────────────────────────────────────────────────── +G = '\033[92m' # green +R = '\033[91m' # red +Y = '\033[93m' # yellow +B = '\033[94m' # blue +W = '\033[97m' # white +X = '\033[0m' # reset + + +class BTFlowTester(Node): + """Mimics a BT leaf node — inspect then upload.""" + + def __init__(self, target_object: str, location_label: str): + super().__init__('bt_flow_tester') + self.target_object = target_object + self.location_label = location_label + + self.inspect_client = self.create_client(Inspect, '/visual_inspection/inspect') + self.upload_client = self.create_client(UploadImages, '/visual_inspection/upload_images') + + # ────────────────────────────────────────────────────────────────────────── + def wait_for_services(self, timeout_sec=10.0): + print(f'\n{B}[BT] Waiting for services...{X}') + ok_i = self.inspect_client.wait_for_service(timeout_sec=timeout_sec) + ok_u = self.upload_client.wait_for_service(timeout_sec=timeout_sec) + + if not ok_i: + print(f'{R}[BT] ✗ /visual_inspection/inspect not available{X}') + print(f' Is inspection_service running? (Terminal 3)') + return False + if not ok_u: + print(f'{R}[BT] ✗ /visual_inspection/upload_images not available{X}') + print(f' Is image_uploader running? (Terminal 4)') + return False + + print(f'{G}[BT] ✓ Both services ready{X}') + return True + + # ────────────────────────────────────────────────────────────────────────── + def step1_inspect(self): + """Step 1: Call inspection service (same as BT Inspect leaf node).""" + print(f'\n{W}━━━ STEP 1: Visual Inspection ━━━━━━━━━━━━━━━━━━━━━━━━━━━━{X}') + print(f'{B}[BT] Requesting inspection of "{self.target_object}" at "{self.location_label}"{X}') + + req = Inspect.Request() + req.target_object = self.target_object + req.location_label = self.location_label + req.max_objects = 0 # inspect all found + req.return_home = True + + future = self.inspect_client.call_async(req) + rclpy.spin_until_future_complete(self, future, timeout_sec=120.0) + + if future.result() is None: + print(f'{R}[BT] ✗ Inspect service call failed (timeout or crash){X}') + return None + + res = future.result() + print(f'\n{B}[BT] Inspect response:{X}') + print(f' success : {G if res.success else R}{res.success}{X}') + print(f' status : {res.status}') + print(f' objects_found : {res.objects_found}') + print(f' objects_inspected: {res.objects_inspected}') + print(f' object_in_back : {res.object_in_back}') + print(f' info : {res.info}') + print(f' image_paths ({len(res.image_paths)}):') + for p in res.image_paths: + print(f' {Y}{p}{X}') + + if res.object_in_back: + print(f'\n{Y}[BT] ⚠ Object in back — BT should rotate robot 180° and retry{X}') + return None + + if not res.success or not res.image_paths: + print(f'\n{R}[BT] ✗ Inspection failed or no images — status: {res.status}{X}') + return None + + print(f'\n{G}[BT] ✓ Inspection complete — proceeding to upload{X}') + return list(res.image_paths) + + # ────────────────────────────────────────────────────────────────────────── + def step2_upload(self, image_paths: list): + """Step 2: Call upload service (same as BT Upload leaf node).""" + print(f'\n{W}━━━ STEP 2: Upload Images to Laptop ━━━━━━━━━━━━━━━━━━━━━━{X}') + print(f'{B}[BT] Uploading {len(image_paths)} paths with label="{self.location_label}"{X}') + + req = UploadImages.Request() + req.image_paths = image_paths + req.session_label = self.location_label # ← this becomes the top folder on laptop + + future = self.upload_client.call_async(req) + rclpy.spin_until_future_complete(self, future, timeout_sec=60.0) + + if future.result() is None: + print(f'{R}[BT] ✗ Upload service call failed (timeout or crash){X}') + return False + + res = future.result() + print(f'\n{B}[BT] Upload response:{X}') + print(f' success : {G if res.success else R}{res.success}{X}') + print(f' uploaded_count : {res.uploaded_count}') + print(f' info : {res.info}') + + if res.success: + print(f'\n{G}[BT] ✓ Upload complete — files saved on laptop at:{X}') + print(f' {Y}received_captures/{self.location_label}/...{X}') + else: + print(f'\n{R}[BT] ✗ Upload failed — is laptop_receiver.py running on the laptop?{X}') + print(f' Run on laptop: python3 Evaluation_V_I_ws/laptop_receiver.py') + + return res.success + + # ────────────────────────────────────────────────────────────────────────── + def run(self): + if not self.wait_for_services(): + return False + + # Step 1 — Inspect + image_paths = self.step1_inspect() + if image_paths is None: + return False + + # Step 2 — Upload + upload_ok = self.step2_upload(image_paths) + + # ── Final BT result ──────────────────────────────────────────────── + print(f'\n{W}━━━ BT RESULT ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━{X}') + if upload_ok: + print(f'{G}✓ BT node → SUCCESS{X}') + print(f' Inspection done + images on laptop at: received_captures/{self.location_label}/') + else: + print(f'{R}✗ BT node → FAILURE (upload failed — BT can retry){X}') + return upload_ok + + +# ── Entry point ─────────────────────────────────────────────────────────────── +def main(): + parser = argparse.ArgumentParser( + description='Simulate the full BT flow: inspect → upload to laptop') + parser.add_argument('--object', '-o', + default='gauge', + choices=['gauge', 'fire_extinguisher', 'extinguisher', + 'door', 'person', 'unknown', 'main_cylinder'], + help='Object to inspect (default: gauge)') + parser.add_argument('--location', '-l', + default='test_location', + help='Location label from BT map (e.g. engine_room_A). Used as folder on laptop.') + args = parser.parse_args() + + print(f'\n{W}Visual Inspection — Full BT Flow Test{X}') + print(f'{W}Target: {G}{args.object}{X} Location: {G}{args.location}{X}\n') + + rclpy.init() + node = BTFlowTester( + target_object=args.object, + location_label=args.location + ) + + success = node.run() + node.destroy_node() + rclpy.shutdown() + sys.exit(0 if success else 1) + + +if __name__ == '__main__': + main() diff --git a/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/test_scripts/test_full_pipeline.py b/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/test_scripts/test_full_pipeline.py index 8968753f..937b60b7 100644 --- a/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/test_scripts/test_full_pipeline.py +++ b/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/test_scripts/test_full_pipeline.py @@ -14,13 +14,11 @@ source /opt/ros/humble/setup.bash source ~/Documents/Visual_Inspection_ws/inspection_ws/install/setup.bash python3 test_scripts/test_full_pipeline.py + python3 test_scripts/test_full_pipeline.py --object gauge + python3 test_scripts/test_full_pipeline.py --object fire_extinguisher --once - Options (edit constants below): - MAX_OBJECTS - 0 = inspect all, N = first N only - RETURN_HOME - servo returns to 90,90 after each run - AUTO_RETRY - if object_in_back=True, wait RETRY_WAIT then resend goal - RETRY_WAIT - seconds to wait before retry (simulates robot rotate) - RUN_ONCE - True = one run then exit, False = loop forever + Supported objects: fire_extinguisher, door, person, gauge + Leave blank (default) to detect ALL classes. """ import rclpy @@ -31,8 +29,9 @@ import sys import time +import argparse -# ---- Config (edit here) ----------------------------------------------- +# ---- Config (edit here or use CLI args) ------------------------------ MAX_OBJECTS = 0 # 0 = inspect all detected front objects RETURN_HOME = True # servo returns to 90,90 after done AUTO_RETRY = True # auto-retry if object_in_back detected @@ -40,11 +39,15 @@ RUN_ONCE = False # True = one run; False = loop until Ctrl+C # ----------------------------------------------------------------------- +# Supported target objects (must match YOLO class names in ibvs_action_server) +SUPPORTED_OBJECTS = ['fire_extinguisher', 'door', 'person', 'gauge', 'any', ''] + class PipelineTester(Node): - def __init__(self): + def __init__(self, target_object=''): super().__init__('pipeline_tester') + self.target_object = target_object self._client = ActionClient( self, InspectObjects, '/visual_inspection/inspect_objects') self.get_logger().info('Waiting for action server...') @@ -53,13 +56,15 @@ def __init__(self): def run_one(self, run_id=1): """Send one inspection goal and wait for result.""" + obj_label = self.target_object or 'any' print(f'\n{"="*60}') - print(f' RUN {run_id} (max_objects={MAX_OBJECTS} return_home={RETURN_HOME})') + print(f' RUN {run_id} object={obj_label} max_objects={MAX_OBJECTS} return_home={RETURN_HOME}') print(f'{"="*60}') goal = InspectObjects.Goal() - goal.max_objects = MAX_OBJECTS - goal.return_home = RETURN_HOME + goal.max_objects = MAX_OBJECTS + goal.return_home = RETURN_HOME + goal.target_object = self.target_object future = self._client.send_goal_async( goal, feedback_callback=self._feedback_cb) @@ -161,7 +166,25 @@ def run_loop(self): def main(): + parser = argparse.ArgumentParser(description='Visual Inspection Pipeline Test') + parser.add_argument( + '--object', '-o', default='', + help='Target object class to inspect. ' + 'Options: fire_extinguisher, door, person, gauge, any (default: any/all)') + parser.add_argument( + '--once', action='store_true', + help='Run once then exit (overrides RUN_ONCE constant)') + args = parser.parse_args() + + target_object = args.object.strip().lower() + if args.once: + global RUN_ONCE + RUN_ONCE = True + print('Visual Inspection Pipeline Test') + print(f' Target object : {target_object or "any (all classes)"}') + print(f' Run once : {RUN_ONCE}') + print() print(' Make sure these are running:') print(' T1: ros2 run visual_inspection_ros camera_node') print(' T2: ros2 run visual_inspection_ros servo_node') @@ -175,7 +198,7 @@ def main(): print() rclpy.init() - node = PipelineTester() + node = PipelineTester(target_object=target_object) try: node.run_loop() except KeyboardInterrupt: diff --git a/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/test_scripts/test_inspection_service.py b/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/test_scripts/test_inspection_service.py new file mode 100644 index 00000000..ce22b2f2 --- /dev/null +++ b/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/test_scripts/test_inspection_service.py @@ -0,0 +1,76 @@ +#!/usr/bin/env python3 +""" +test_inspection_service.py — Test client for the inspection ROS2 service. + +Simulates what the Behaviour Tree sends. + +Usage: + python3 test_inspection_service.py --object fire_extinguisher + python3 test_inspection_service.py --object gauge + python3 test_inspection_service.py --object unknown + python3 test_inspection_service.py --object main_cylinder + python3 test_inspection_service.py --object door + python3 test_inspection_service.py --object person + python3 test_inspection_service.py # detect all classes +""" + +import sys +import argparse +import rclpy +from rclpy.node import Node +from visual_inspection_interfaces.srv import Inspect + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument('--object', default='', help='Target object class') + parser.add_argument('--location', default='test_loc', help='Location label') + parser.add_argument('--max', default=0, type=int, help='Max objects (0=all)') + parser.add_argument('--no-home', action='store_true', help='Skip return home') + args = parser.parse_args() + + rclpy.init() + node = Node('inspection_test_client') + cli = node.create_client(Inspect, '/visual_inspection/inspect') + + print(f'\nWaiting for /visual_inspection/inspect service...') + if not cli.wait_for_service(timeout_sec=10.0): + print('[ERROR] Service not available') + sys.exit(1) + + req = Inspect.Request() + req.target_object = args.object + req.location_label = args.location + req.max_objects = args.max + req.return_home = not args.no_home + + obj_label = args.object or 'any' + print(f'\n{"="*55}') + print(f' Sending request: object="{obj_label}" loc="{args.location}"') + print(f'{"="*55}') + + future = cli.call_async(req) + rclpy.spin_until_future_complete(node, future) + + if future.result() is None: + print('[ERROR] Service call failed') + sys.exit(1) + + res = future.result() + print(f'\n--- RESPONSE ---') + print(f' success : {res.success}') + print(f' status : {res.status}') + print(f' objects_found : {res.objects_found}') + print(f' objects_inspected: {res.objects_inspected}') + print(f' object_in_back : {res.object_in_back}') + print(f' info : {res.info}') + print(f' images saved ({len(res.image_paths)}):') + for p in res.image_paths: + print(f' {p}') + + node.destroy_node() + rclpy.shutdown() + + +if __name__ == '__main__': + main() diff --git a/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/tools/detect_cameras.py b/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/tools/detect_cameras.py old mode 100644 new mode 100755 diff --git a/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/tools/ibvs_pipeline.py b/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/tools/ibvs_pipeline.py index 9071fe26..1194f2c6 100644 --- a/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/tools/ibvs_pipeline.py +++ b/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/tools/ibvs_pipeline.py @@ -325,7 +325,7 @@ def compute_servo_correction(self, e_x, e_y, dt=0.033): d_tilt = self.kd_tilt * (error_tilt - self.prev_error_tilt) / dt self.prev_error_tilt = error_tilt - delta_tilt = -(p_tilt + i_tilt + d_tilt) + delta_tilt = (p_tilt + i_tilt + d_tilt) # +ve: tilt up when object above centre # Velocity limiting for tilt MAX_SPEED_TILT = 3.0 # balanced speed diff --git a/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/tools/quick_camera_test.py b/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/tools/quick_camera_test.py old mode 100644 new mode 100755 diff --git a/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/visual_pipeline.py b/Visual_inspection/Visual_Inspection_system_Jetson_orin_nano/visual_pipeline.py old mode 100644 new mode 100755