-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathobject_detection.yaml
More file actions
148 lines (129 loc) · 8.02 KB
/
object_detection.yaml
File metadata and controls
148 lines (129 loc) · 8.02 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
%YAML 1.2
---
# ──────────────────────────────────────────────────────────────────────
# Object Detection Task (PANDORA Dataset)
# ──────────────────────────────────────────────────────────────────────
# CenterNet-style detector that predicts heatmaps + regression maps
# for Rectangular Bounding Fields of View (RBFoV).
# Supports both Spherical and Planar backbones via architecture default.
# ──────────────────────────────────────────────────────────────────────
root_directory: ${oc.env:USF_PROJECT_DIRECTORY}/config/task/object_detection
defaults:
- object_detection@architecture: spherical # architecture variant: spherical | planar
- strategy: DDP # distributed training strategy: DDP | DeepSpeed | FSDP
- _self_
# checkpoint: ${oc.env:USF_PROJECT_DIRECTORY}/data/object_detection/checkpoint/spherical-nrr-200.ckpt # uncomment to resume
# ── Lightning Model ──────────────────────────────────────────────────
model:
_target_: usf.network.model.object_detection.ObjectDetectionLightningModel
optimizer_param:
lr: 0.001 # AdamW learning rate
weight_decay: 0.01 # AdamW weight decay
lr_scheduler_param:
warmup_steps_rate: 0.4 # fraction of total steps spent in linear warmup
min_lr_rate: 0.01 # minimum LR as a ratio of initial LR
num_cycles: 0.5 # cosine annealing half-cycles (0.5 = single decay to min)
benchmark_train: false # compute mAP every training step (slow, for debugging)
benchmark_eval: false # compute mAP + visualize predictions every validation step
num_regression_points: 3 # number of nearest grid points to activate for each GT centroid in the regression target maps
# ── Post-Processing: Extract Raw Detections ──────────────────────
extract_raw_rbfovs:
probability_threshold: 0.3 # minimum confidence in [0, 1) for a heatmap peak to become a detection
pool_radius: 0.06705 # geodesic radius for local-max pooling (optional; defaults to 1.5 x avg nn distance)
# ── Post-Processing: Non-Maximum Suppression ─────────────────────
non_maximum_suppression:
max_rbfov_per_category: 20 # keep at most this many detections per class
sigma: 5.0 # Gaussian decay strength for Matrix NMS
score_threshold: 0.3 # discard proposals below this updated confidence
reduction: prod # row-wise IoU reduction: "prod" (stronger crowd-breaking) | "min" (SOLOv2-like)
compensate: false # SOLOv2-style max-IoU compensation
hard_iou_threshold: 0.2 # greedy Hard-NMS backstop IoU cutoff after Matrix NMS
epsilon_tie_break: 1e-6 # small perturbation to break exact IoU ties deterministically
vector_average_area: 2.7e-5 # average pixel area (sr) on the candidate sphere for pairwise IoU estimation
# ── Evaluation ───────────────────────────────────────────────────
benchmark:
iou_threshold: [0.1, 0.5, 0.75] # mAP evaluated at these IoU thresholds
num_vis: 8 # samples to visualize each validation epoch
log_image_dpi: 100 # DPI for logged figures
# ── Loss ─────────────────────────────────────────────────────────
loss_param:
focal_alpha: 2.0 # focal-loss exponent for positive (detected) examples
focal_beta: 4.0 # focal-loss exponent for negative (background) examples
lambda_size: 1.0 # weight for RBFoV size regression loss
lambda_offset: 1.0 # weight for centroid offset regression loss
lambda_angle: 1.0 # weight for rotation angle regression loss
allow_periodic_rotation: false # if true, does not penalize predicted angle = GT + k x pi
architecture: ${task.architecture}
# ── Data Module ──────────────────────────────────────────────────────
data_module:
_target_: usf.dataset.pandora.PandoraDataModule
dataset_base_path: ${oc.env:USF_PROJECT_DIRECTORY}/data/PANDORA
# augmentation probabilities in [0.0, 1.0]
train_augmentation:
chroma_jitter: 0.5 # random hue/saturation shift
luma_jitter: 0.5 # random brightness/contrast shift
gaussian_blur: 0.5 # Gaussian blur augmentation
gray_scale: 0.05 # convert to grayscale
horizontal_reflection: 0.5 # left-right reflection on the sphere
vertical_reflection: 0.0 # top-bottom reflection
erase: 0.0 # random erasing
rotation: 0.0 # random SO(3) rotation
val_augmentation:
chroma_jitter: 0.0
luma_jitter: 0.0
gaussian_blur: 0.0
gray_scale: 0.0
horizontal_reflection: 0.5
vertical_reflection: 0.0 # 2 flip + rotation <=> no flip + rotation
erase: 0.0
rotation: 0.0
# Uncomment to reproject onto a pinhole/fisheye output grid:
# train_output_vector:
# _target_: numpy.load
# file: ${oc.env:USF_PROJECT_DIRECTORY}/config/lens_normal_map/180_180_560_560.npy
# allow_pickle: true
# train_output_vector_mask:
# _target_: numpy.load
# file: ${oc.env:USF_PROJECT_DIRECTORY}/config/lens_normal_map/180_180_560_560_mask.npy
# allow_pickle: true
# val_output_vector:
# _target_: numpy.load
# file: ${oc.env:USF_PROJECT_DIRECTORY}/config/lens_normal_map/180_180_560_560.npy
# allow_pickle: true
# val_output_vector_mask:
# _target_: numpy.load
# file: ${oc.env:USF_PROJECT_DIRECTORY}/config/lens_normal_map/180_180_560_560_mask.npy
# allow_pickle: true
val_seed: Object Detection # deterministic seed for validation ordering
downsample_image_size: [960, 480] # equirectangular output [width, height]; changing this affects model input resolution
meta:
input:
mean: [0.52076054, 0.46685297, 0.41337782] # per-channel mean on [0, 1] scale (computed offline)
std: [0.23466274, 0.23835337, 0.25246194] # per-channel std on [0, 1] scale
range: [0.0, 255.0] # raw pixel value range before normalization
num_workers: 2
batch_size: 8
# ── Trainer ──────────────────────────────────────────────────────────
trainer:
_target_: pytorch_lightning.Trainer
accelerator: gpu
enable_model_summary: true
precision: 32-true
devices: -1
max_epochs: 200
log_every_n_steps: 10
num_sanity_val_steps: 0
use_distributed_sampler: false
profiler: simple
strategy: ${task.strategy}
callbacks:
- _target_: pytorch_lightning.callbacks.LearningRateMonitor
logging_interval: epoch
logger:
- _target_: pytorch_lightning.loggers.WandbLogger
entity: ${oc.env:WANDB_ENTITY,null}
project: Object Detection
name: ${task.architecture.model_type}
# - _target_: pytorch_lightning.loggers.TensorBoardLogger
# name: ${task.architecture.model_type} Object Detection
# save_dir: ${oc.env:USF_PROJECT_DIRECTORY}/tensorboard_logs