diff --git a/CHANGELOG.md b/CHANGELOG.md index 79e62151..8e076b22 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,25 @@ No changes to highlight. ## Other Changes: +No changes to highlight. + +# v1.4.1 + +## New Features: + +- Add yolov9 pretrained weights by `@illian01` in [PR 631](https://github.com/Nota-NetsPresso/netspresso-trainer/pull/631) +- Add EXIR exporting feature by `@illian01` in [PR 632](https://github.com/Nota-NetsPresso/netspresso-trainer/pull/632) + +## Bug Fixes: + +No changes to highlight. + +## Breaking Changes: + +No changes to highlight. + +## Other Changes: + Fix/add data params mlflow by `@hglee98` in [PR 629](https://github.com/Nota-NetsPresso/netspresso-trainer/pull/629) # 1.4.0 diff --git a/Dockerfile b/Dockerfile index 8c37353f..66d5dd5f 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM python:3.8.16 +FROM python:3.10 ARG TORCH_VERSION="2.0.1" ARG TORCHVISION_VERSION="0.15.2" diff --git a/README.md b/README.md index aa8c7c8c..20160d20 100644 --- a/README.md +++ b/README.md @@ -33,8 +33,8 @@ _____ ### Prerequisites -- Python `3.8` | `3.9` | `3.10` -- PyTorch `2.0.1` (recommended) (compatible with: `1.11.x` - `2.0.1`) +- Python `>=3.10` +- PyTorch `>=2.0.1` ### Install with pypi diff --git a/config/benchmark_examples/detection-coco2017-yolov9_tiny/augmentation.yaml b/config/benchmark_examples/detection-coco2017-yolov9_tiny/augmentation.yaml new file mode 100644 index 00000000..145ada0b --- /dev/null +++ b/config/benchmark_examples/detection-coco2017-yolov9_tiny/augmentation.yaml @@ -0,0 +1,56 @@ +augmentation: + train: + - + name: mosaicdetection + size: [640, 640] + mosaic_prob: 1.0 + affine_scale: [0.1, 1.9] + degrees: 0.0 + translate: 0.1 + shear: 0.0 + enable_mixup: True + mixup_prob: 0.15 + mixup_scale: [0.1, 2.0] + fill: 0 + mosaic_off_duration: 15 + - + name: hsvjitter + h_mag: 5 + s_mag: 30 + v_mag: 30 + - + name: randomhorizontalflip + p: 0.5 + - + name: resize + size: 640 + interpolation: bilinear + max_size: ~ + resize_criteria: long + - + name: pad + size: 640 + fill: 0 + - + name: randomresize + base_size: [640, 640] + stride: 32 + random_range: 5 + interpolation: bilinear + - + name: totensor + pixel_range: 1.0 + inference: + - + name: resize + size: 640 + interpolation: bilinear + max_size: ~ + resize_criteria: long + - + name: pad + size: 640 + fill: 0 + - + name: totensor + pixel_range: 1.0 diff --git a/config/benchmark_examples/detection-coco2017-yolov9_tiny/data.yaml b/config/benchmark_examples/detection-coco2017-yolov9_tiny/data.yaml new file mode 100644 index 00000000..86801f45 --- /dev/null +++ b/config/benchmark_examples/detection-coco2017-yolov9_tiny/data.yaml @@ -0,0 +1,21 @@ +data: + name: coco2017 + task: detection + format: local # local, huggingface + path: + root: ./data/coco2017 # dataset root + train: + image: images/train # directory for training images + label: labels/train # directory for training labels + valid: + image: images/valid # directory for valid images + label: labels/valid # directory for valid labels + test: + image: ~ + label: ~ + pattern: + image: ~ + label: ~ + id_mapping: id_mapping.json + # id_mapping: ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'] + pallete: ~ diff --git a/config/benchmark_examples/detection-coco2017-yolov9_tiny/environment.yaml b/config/benchmark_examples/detection-coco2017-yolov9_tiny/environment.yaml new file mode 100644 index 00000000..a5b5650d --- /dev/null +++ b/config/benchmark_examples/detection-coco2017-yolov9_tiny/environment.yaml @@ -0,0 +1,6 @@ +environment: + seed: 1 + num_workers: 4 + gpus: 0, 1, 2, 3, 4, 5, 6, 7 + batch_size: 16 # Batch size per gpu + cache_data: False \ No newline at end of file diff --git a/config/benchmark_examples/detection-coco2017-yolov9_tiny/logging.yaml b/config/benchmark_examples/detection-coco2017-yolov9_tiny/logging.yaml new file mode 100644 index 00000000..d352ee29 --- /dev/null +++ b/config/benchmark_examples/detection-coco2017-yolov9_tiny/logging.yaml @@ -0,0 +1,18 @@ +logging: + project_id: ~ + output_dir: ./outputs + tensorboard: true + mlflow: false + stdout: true + num_save_samples: 16 # num_save_samples should be >= 0 or None + model_save_options: + save_optimizer_state: true + save_best_only: false + best_model_criterion: loss # metric + sample_input_size: [640, 640] # Used for flops and onnx export + onnx_export_opset: 13 # Recommend in range [13, 17] + validation_epoch: &validation_epoch 10 + save_checkpoint_epoch: *validation_epoch # Multiplier of `validation_epoch`. + metrics: + classwise_analysis: False + metric_names: ~ # None for default settings \ No newline at end of file diff --git a/config/benchmark_examples/detection-coco2017-yolov9_tiny/model.yaml b/config/benchmark_examples/detection-coco2017-yolov9_tiny/model.yaml new file mode 100644 index 00000000..0030cb91 --- /dev/null +++ b/config/benchmark_examples/detection-coco2017-yolov9_tiny/model.yaml @@ -0,0 +1,72 @@ +model: + task: detection + name: yolov9_tiny + checkpoint: + use_pretrained: false + load_head: false + path: ~ + optimizer_path: ~ + freeze_backbone: false + architecture: + full: ~ # auto + backbone: + name: gelan + params: + stem_out_channels: 16 + stem_kernel_size: 3 + stem_stride: 2 + return_stage_idx: [1, 2, 3] + act_type: &act_type silu + stage_params: + # Conv2D: ['conv', out_channels, kernel_size, stride] + # ELAN: ['elan', out_channels, part_channels, use_identity] + # RepNCSPELAN: ['repncspelan', out_channels, part_channels, use_identity, depth] + # AConv: ['aconv', out_channels] + # ADown: ['adown', out_channels] + - + - ['conv', 32, 3, 2] + - ['elan', 32, 32, false] + - + - ['aconv', 64] + - ['repncspelan', 64, 64, false, 3] + - + - ['aconv', 96] + - ['repncspelan', 96, 96, false, 3] + - + - ['aconv', 128] + - ['repncspelan', 128, 128, false, 3] + neck: + name: yolov9fpn + params: + repeat_num: 3 + act_type: *act_type + use_aux_loss: &use_aux_loss false + bu_type: aconv + spp_channels: 128 + n4_channels: 96 + p3_channels: 64 + p3_to_p4_channels: 48 + p4_channels: 96 + p4_to_p5_channels: 64 + p5_channels: 128 + head: + name: yolo_detection_head + params: + version: v9 + num_anchors: ~ + use_group: true + reg_max: ®_max 16 + act_type: *act_type + use_aux_loss: *use_aux_loss + postprocessor: + params: + # postprocessor - decode + reg_max: *reg_max + score_thresh: 0.01 + # postprocessor - nms + nms_thresh: 0.65 + class_agnostic: false + losses: + - criterion: yolov9_loss + weight: ~ + l1_activate_epoch: ~ diff --git a/config/benchmark_examples/detection-coco2017-yolov9_tiny/training.yaml b/config/benchmark_examples/detection-coco2017-yolov9_tiny/training.yaml new file mode 100644 index 00000000..2721976c --- /dev/null +++ b/config/benchmark_examples/detection-coco2017-yolov9_tiny/training.yaml @@ -0,0 +1,23 @@ +training: + epochs: 500 + mixed_precision: True + max_norm: ~ + ema: + name: exp_decay + decay: 0.9999 + beta: 2000 + optimizer: + name: sgd + lr: 0.01 + momentum: 0.937 + weight_decay: 0.0005 # No bias and norm decay + nesterov: True + no_bias_decay: True + no_norm_weight_decay: True + overwrite: ~ + scheduler: + name: cosine_no_sgdr + warmup_epochs: 3 + warmup_bias_lr: 0.001 + min_lr: 0.0001 + end_epoch: 485 diff --git a/config/model/mobilenetv4/mobilenetv4-conv-medium-classification.yaml b/config/model/mobilenetv4/mobilenetv4-conv-medium-classification.yaml index 5301ebe9..6a896c19 100644 --- a/config/model/mobilenetv4/mobilenetv4-conv-medium-classification.yaml +++ b/config/model/mobilenetv4/mobilenetv4-conv-medium-classification.yaml @@ -21,7 +21,7 @@ model: norm_type: batch_norm act_type: relu return_stage_idx: ~ - layer_scale: 0.1 + layer_scale: ~ stage_params: # Conv2D: ['conv', out_channels, kernel_size, stride] # FusedIB: ['fi', out_channels, hidden_channels, kernel_size, stride] diff --git a/config/model/mobilenetv4/mobilenetv4-conv-small-classification.yaml b/config/model/mobilenetv4/mobilenetv4-conv-small-classification.yaml index a71d02a3..5114fd33 100644 --- a/config/model/mobilenetv4/mobilenetv4-conv-small-classification.yaml +++ b/config/model/mobilenetv4/mobilenetv4-conv-small-classification.yaml @@ -21,7 +21,7 @@ model: norm_type: batch_norm act_type: relu return_stage_idx: ~ - layer_scale: 0.1 + layer_scale: ~ stage_params: # Conv2D: ['conv', out_channels, kernel_size, stride] # FusedIB: ['fi', out_channels, hidden_channels, kernel_size, stride] diff --git a/config/model/mobilenetv4/mobilenetv4-hybrid-large-classification.yaml b/config/model/mobilenetv4/mobilenetv4-hybrid-large-classification.yaml index c5736965..13d2a0aa 100644 --- a/config/model/mobilenetv4/mobilenetv4-hybrid-large-classification.yaml +++ b/config/model/mobilenetv4/mobilenetv4-hybrid-large-classification.yaml @@ -21,7 +21,7 @@ model: norm_type: batch_norm act_type: gelu return_stage_idx: ~ - layer_scale: ~ + layer_scale: 0.1 stage_params: # Conv2D: ['conv', out_channels, kernel_size, stride] # FusedIB: ['fi', out_channels, hidden_channels, kernel_size, stride] diff --git a/config/model/yolo/yolov9_c-detection.yaml b/config/model/yolov9/yolov9_c-detection.yaml similarity index 98% rename from config/model/yolo/yolov9_c-detection.yaml rename to config/model/yolov9/yolov9_c-detection.yaml index 56893e9f..50cfb0dd 100644 --- a/config/model/yolo/yolov9_c-detection.yaml +++ b/config/model/yolov9/yolov9_c-detection.yaml @@ -2,7 +2,7 @@ model: task: detection name: yolov9_c checkpoint: - use_pretrained: false + use_pretrained: true load_head: false path: ~ optimizer_path: ~ diff --git a/config/model/yolo/yolov9_m-detection.yaml b/config/model/yolov9/yolov9_m-detection.yaml similarity index 98% rename from config/model/yolo/yolov9_m-detection.yaml rename to config/model/yolov9/yolov9_m-detection.yaml index dee08ea8..d8fa0088 100644 --- a/config/model/yolo/yolov9_m-detection.yaml +++ b/config/model/yolov9/yolov9_m-detection.yaml @@ -2,7 +2,7 @@ model: task: detection name: yolov9_m checkpoint: - use_pretrained: false + use_pretrained: true load_head: false path: ~ optimizer_path: ~ diff --git a/config/model/yolo/yolov9_s-detection.yaml b/config/model/yolov9/yolov9_s-detection.yaml similarity index 98% rename from config/model/yolo/yolov9_s-detection.yaml rename to config/model/yolov9/yolov9_s-detection.yaml index b041c1c6..6beafd85 100644 --- a/config/model/yolo/yolov9_s-detection.yaml +++ b/config/model/yolov9/yolov9_s-detection.yaml @@ -2,7 +2,7 @@ model: task: detection name: yolov9_s checkpoint: - use_pretrained: false + use_pretrained: true load_head: false path: ~ optimizer_path: ~ diff --git a/config/model/yolov9/yolov9_tiny-detection.yaml b/config/model/yolov9/yolov9_tiny-detection.yaml new file mode 100644 index 00000000..65bbd170 --- /dev/null +++ b/config/model/yolov9/yolov9_tiny-detection.yaml @@ -0,0 +1,72 @@ +model: + task: detection + name: yolov9_tiny + checkpoint: + use_pretrained: true + load_head: false + path: ~ + optimizer_path: ~ + freeze_backbone: false + architecture: + full: ~ # auto + backbone: + name: gelan + params: + stem_out_channels: 16 + stem_kernel_size: 3 + stem_stride: 2 + return_stage_idx: [1, 2, 3] + act_type: &act_type silu + stage_params: + # Conv2D: ['conv', out_channels, kernel_size, stride] + # ELAN: ['elan', out_channels, part_channels, use_identity] + # RepNCSPELAN: ['repncspelan', out_channels, part_channels, use_identity, depth] + # AConv: ['aconv', out_channels] + # ADown: ['adown', out_channels] + - + - ['conv', 32, 3, 2] + - ['elan', 32, 32, false] + - + - ['aconv', 64] + - ['repncspelan', 64, 64, false, 3] + - + - ['aconv', 96] + - ['repncspelan', 96, 96, false, 3] + - + - ['aconv', 128] + - ['repncspelan', 128, 128, false, 3] + neck: + name: yolov9fpn + params: + repeat_num: 3 + act_type: *act_type + use_aux_loss: &use_aux_loss false + bu_type: aconv + spp_channels: 128 + n4_channels: 96 + p3_channels: 64 + p3_to_p4_channels: 48 + p4_channels: 96 + p4_to_p5_channels: 64 + p5_channels: 128 + head: + name: yolo_detection_head + params: + version: v9 + num_anchors: ~ + use_group: true + reg_max: ®_max 16 + act_type: *act_type + use_aux_loss: *use_aux_loss + postprocessor: + params: + # postprocessor - decode + reg_max: *reg_max + score_thresh: 0.01 + # postprocessor - nms + nms_thresh: 0.65 + class_agnostic: false + losses: + - criterion: yolov9_loss + weight: ~ + l1_activate_epoch: ~ diff --git a/docs/benchmarks/benchmarks.md b/docs/benchmarks/benchmarks.md index 0fec534b..e3f7e0f6 100644 --- a/docs/benchmarks/benchmarks.md +++ b/docs/benchmarks/benchmarks.md @@ -40,6 +40,10 @@ If you have a better recipe, please share with us anytime. We appreciate all eff |---|---|---|---|---|---|---|---|---|---|---| | COCO-val | [RT-DETR_res18*](https://github.com/Nota-NetsPresso/netspresso-trainer/blob/master/config/model/rtdetr/rtdetr-res18-detection.yaml) | [download](https://netspresso-trainer-public.s3.ap-northeast-2.amazonaws.com/checkpoint/rtdetr/rtdetr_res18_coco.safetensors?versionId=uu9v49NI6rQx8wOY6bJbEXUFOG_R9xqH) | (640, 640) | 65.77 | 52.75 | 48.49 | 20.18M | 40.36G | Supported | No input z-norm, [lyuwenyu/RT-DETR](https://github.com/lyuwenyu/RT-DETR/tree/main/rtdetr_pytorch) | | COCO-val | [RT-DETR_res50*](https://github.com/Nota-NetsPresso/netspresso-trainer/blob/master/config/model/rtdetr/rtdetr-res50-detection.yaml) | [download](https://netspresso-trainer-public.s3.ap-northeast-2.amazonaws.com/checkpoint/rtdetr/rtdetr_res50_coco.safetensors?versionId=JHmnjY13BEflpnDCYPFJ1c17UwpqDrLQ) | (640, 640) | 72.64 | 59.50 | 54.73 | 42.94M | 138.36G | Supported | No input z-norm, [lyuwenyu/RT-DETR](https://github.com/lyuwenyu/RT-DETR/tree/main/rtdetr_pytorch) | +| COCO-val | [yolov9-tiny](https://github.com/Nota-NetsPresso/netspresso-trainer/blob/master/config/model/yolov9/yolov9_tiny-detection.yaml) | [download](https://netspresso-trainer-public.s3.ap-northeast-2.amazonaws.com/checkpoint/yolov9/yolov9_tiny_coco.safetensors) | (640, 640) | 50.03 | 38.63 | 36.02 | 2.44M | 9.99G | Supported | No input z-norm | +| COCO-val | [yolov9-s*](https://github.com/Nota-NetsPresso/netspresso-trainer/blob/master/config/model/yolov9/yolov9_s-detection.yaml) | [download](https://netspresso-trainer-public.s3.ap-northeast-2.amazonaws.com/checkpoint/yolov9/yolov9_s_coco.safetensors) | (640, 640) | 62.63 | 51.13 | 47.13 | 7.23M | 26.87G | Supported | No input z-norm, [YOLO](https://yolo-docs.readthedocs.io/en/latest/2_model_zoo/0_object_detection.html) | +| COCO-val | [yolov9-m*](https://github.com/Nota-NetsPresso/netspresso-trainer/blob/master/config/model/yolov9/yolov9_m-detection.yaml) | [download](https://netspresso-trainer-public.s3.ap-northeast-2.amazonaws.com/checkpoint/yolov9/yolov9_m_coco.safetensors) | (640, 640) | 67.43 | 56.13 | 51.72 | 20.12M | 77.08G | Supported | No input z-norm, [YOLO](https://yolo-docs.readthedocs.io/en/latest/2_model_zoo/0_object_detection.html) | +| COCO-val | [yolov9-c*](https://github.com/Nota-NetsPresso/netspresso-trainer/blob/master/config/model/yolov9/yolov9_c-detection.yaml) | [download](https://netspresso-trainer-public.s3.ap-northeast-2.amazonaws.com/checkpoint/yolov9/yolov9_c_coco.safetensors) | (640, 640) | 69.16 | 57.90 | 53.28 | 25.50M | 103.17G | Supported | No input z-norm, [YOLO](https://yolo-docs.readthedocs.io/en/latest/2_model_zoo/0_object_detection.html) | | COCO-val | [YOLOX-nano*](https://github.com/Nota-NetsPresso/netspresso-trainer/blob/master/config/model/yolox/yolox-nano-detection.yaml) | [download](https://netspresso-trainer-public.s3.ap-northeast-2.amazonaws.com/checkpoint/yolox/yolox_nano_coco.safetensors?versionId=JCXugDTwGegx9Kl6Jc5AMJpIkA.WlNVP) | (416, 416) | 41.30 | 27.90 | 26.33 | 0.91M | 1.08G | Supported | [Megvii-BaseDetection/YOLOX](https://github.com/Megvii-BaseDetection/YOLOX?tab=readme-ov-file#benchmark), conf_thresh=0.01, nms_thresh=0.65 | | COCO-val | [YOLOX-tiny*](https://github.com/Nota-NetsPresso/netspresso-trainer/blob/master/config/model/yolox/yolox-tiny-detection.yaml) | [download](https://netspresso-trainer-public.s3.ap-northeast-2.amazonaws.com/checkpoint/yolox/yolox_tiny_coco.safetensors?versionId=lJp1bCEToD_6IaL9kRCqcYIwVZ.QQ.1P) | (416, 416) | 50.69 | 36.18 | 34.00 | 5.06M | 6.45G | Supported | [Megvii-BaseDetection/YOLOX](https://github.com/Megvii-BaseDetection/YOLOX?tab=readme-ov-file#benchmark), conf_thresh=0.01, nms_thresh=0.65 | | COCO-val | [YOLOX-s](https://github.com/Nota-NetsPresso/netspresso-trainer/blob/master/config/model/yolox/yolox-s-detection.yaml) | [download](https://netspresso-trainer-public.s3.ap-northeast-2.amazonaws.com/checkpoint/yolox/yolox_s_coco.safetensors?versionId=QRLqHKqhv8TSYBrmsQ3M8lCR8w7HEZyA) | (640, 640) | 58.56 | 44.10 | 40.63 | 8.97M | 26.81G | Supported | conf_thresh=0.01, nms_thresh=0.65 | diff --git a/docs/getting_started/installation/installation.md b/docs/getting_started/installation/installation.md index b8c42fdf..e0b88d05 100644 --- a/docs/getting_started/installation/installation.md +++ b/docs/getting_started/installation/installation.md @@ -2,8 +2,8 @@ ### Prerequisites -- Python `3.8` | `3.9` | `3.10` -- PyTorch `2.0.1` (recommended) (compatible with: `1.11.x` - `2.0.1`) +- Python `>=3.10` +- PyTorch `>=2.0.1` ### Install with pypi diff --git a/requirements.txt b/requirements.txt index 5ffe68ff..e00673bb 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ -torch>=1.11.0,<=2.0.1 -torchvision>=0.12.0,<=0.15.2 +torch>=2.0.1 +torchvision onnx onnxruntime numpy diff --git a/src/netspresso_trainer/VERSION b/src/netspresso_trainer/VERSION index e21e727f..13175fdc 100644 --- a/src/netspresso_trainer/VERSION +++ b/src/netspresso_trainer/VERSION @@ -1 +1 @@ -1.4.0 \ No newline at end of file +1.4.1 \ No newline at end of file diff --git a/src/netspresso_trainer/loggers/stdout.py b/src/netspresso_trainer/loggers/stdout.py index 8586573a..c810f3bc 100644 --- a/src/netspresso_trainer/loggers/stdout.py +++ b/src/netspresso_trainer/loggers/stdout.py @@ -65,12 +65,20 @@ def __call__( else: rows += [class_info.split('_', 1) for class_info in list(metrics[headers[-1]]['classwise'].keys())] rows += [['-', 'All', ]] if not data_stats else [['-', 'All', data_stats['total_instances']]] + all_row_idx = len(rows) - 1 + + has_weighted = any('weighted_mean' in v for v in metrics.values()) + if has_weighted: + rows += [['-', 'All (weighted)', data_stats['total_instances']]] if data_stats else [['-', 'All (weighted)']] + weighted_row_idx = len(rows) - 1 for _metric_name, score_dict in metrics.items(): if 'classwise' in score_dict: # If classwise analysis is activated for cls_num, item in enumerate(score_dict['classwise']): rows[cls_num].append(score_dict['classwise'][item]) - rows[-1].append(score_dict['mean']) + rows[all_row_idx].append(score_dict['mean']) + if has_weighted: + rows[weighted_row_idx].append(score_dict.get('weighted_mean', score_dict['mean'])) metric_std_log += tabulate(rows, headers=headers, tablefmt='grid', numalign='left', stralign='left') logger.info(metric_std_log) # tabulaate is already contained as pandas dependency diff --git a/src/netspresso_trainer/metrics/base.py b/src/netspresso_trainer/metrics/base.py index 2dc3e37b..39527487 100644 --- a/src/netspresso_trainer/metrics/base.py +++ b/src/netspresso_trainer/metrics/base.py @@ -29,6 +29,7 @@ def __init__(self, metric_name, num_classes, classwise_analysis, **kwargs): if self.classwise_analysis: self.classwise_metric_meters = [MetricMeter(f'{metric_name}_{i}', ':6.2f') for i in range(num_classes)] self.metric_meter = MetricMeter(metric_name, ':6.2f') + self.weighted_metric_meter = None # Optionally set by subclasses def calibrate(self, pred, target, **kwargs): raise NotImplementedError @@ -43,7 +44,13 @@ def __init__(self, task, metrics, metric_adaptor, classwise_analysis) -> None: def reset_values(self): for phase in self.metrics: - [metric.metric_meter.reset() for metric in self.metrics[phase]] + for metric in self.metrics[phase]: + metric.metric_meter.reset() + if metric.weighted_metric_meter is not None: + metric.weighted_metric_meter.reset() + if metric.classwise_analysis: + for meter in metric.classwise_metric_meters: + meter.reset() def update(self, pred: torch.Tensor, target: torch.Tensor, phase: str, **kwargs: Any) -> None: if len(pred) == 0: # Removed dummy batch has 0 len @@ -55,13 +62,16 @@ def update(self, pred: torch.Tensor, target: torch.Tensor, phase: str, **kwargs: def result(self, phase='train'): ret = {metric.metric_name: {} for metric in self.metrics[phase]} # Initialize with empty dict - if phase == 'valid' and self.classwise_analysis: # Add classwise results only for valid phase + if self.classwise_analysis: # Add classwise results for any phase when enabled for metric in self.metrics[phase]: - classwise_result_dict = {i:classwise_meter.avg for i, classwise_meter in enumerate(metric.classwise_metric_meters)} - ret[metric.metric_name] = {'classwise': classwise_result_dict} + if metric.classwise_analysis: + classwise_result_dict = {i:classwise_meter.avg for i, classwise_meter in enumerate(metric.classwise_metric_meters)} + ret[metric.metric_name]['classwise'] = classwise_result_dict for metric in self.metrics[phase]: ret[metric.metric_name]['mean'] = metric.metric_meter.avg # Add mean score + if metric.weighted_metric_meter is not None: + ret[metric.metric_name]['weighted_mean'] = metric.weighted_metric_meter.avg return ret diff --git a/src/netspresso_trainer/metrics/builder.py b/src/netspresso_trainer/metrics/builder.py index f7e81e3e..535f41c8 100644 --- a/src/netspresso_trainer/metrics/builder.py +++ b/src/netspresso_trainer/metrics/builder.py @@ -36,10 +36,7 @@ def build_metrics(task: str, model_conf, metrics_conf, num_classes, **kwargs) -> metrics = {} for phase in PHASE_LIST: - if phase == 'valid': # classwise_analysis is only available in valid phase - metrics[phase] = [METRIC_LIST[name](num_classes=num_classes, classwise_analysis=classwise_analysis, **kwargs) for name in metric_names] - else: - metrics[phase] = [METRIC_LIST[name](num_classes=num_classes, classwise_analysis=False, **kwargs) for name in metric_names] + metrics[phase] = [METRIC_LIST[name](num_classes=num_classes, classwise_analysis=classwise_analysis, **kwargs) for name in metric_names] metric_adaptor = METRIC_ADAPTORS[task](metric_names) diff --git a/src/netspresso_trainer/metrics/detection/metric.py b/src/netspresso_trainer/metrics/detection/metric.py index 296d96d8..5f6ad265 100644 --- a/src/netspresso_trainer/metrics/detection/metric.py +++ b/src/netspresso_trainer/metrics/detection/metric.py @@ -26,6 +26,7 @@ import numpy as np from ..base import BaseMetric +from ...utils.record import MetricMeter def box_iou_batch(boxes_true: np.ndarray, boxes_detection: np.ndarray) -> np.ndarray: @@ -279,16 +280,29 @@ def __call__(self, predictions: List[dict], targets: List[dict]): true_objs = np.concatenate((true_objs_bbox, true_objs_class[..., np.newaxis]), axis=-1) predicted_objs = np.concatenate((predicted_objs_bbox, predicted_objs_class[..., np.newaxis], predicted_objs_confidence[..., np.newaxis]), axis=-1) - if predicted_objs.shape[0] == 0 and true_objs.shape[0]: + if predicted_objs.shape[0] == 0 and true_objs.shape[0] == 0: + pass # Nothing to record + elif predicted_objs.shape[0] == 0: + # GT exists but no predictions: all GT are FN stats.append( ( np.zeros((0, iou_thresholds.size), dtype=bool), - *np.zeros((2, 0)), + np.zeros(0), + np.zeros(0), true_objs[:, 4], ) ) - - if true_objs.shape[0]: + elif true_objs.shape[0] == 0: + # Predictions exist but no GT: all predictions are FP + stats.append( + ( + np.zeros((predicted_objs.shape[0], iou_thresholds.size), dtype=bool), + predicted_objs[:, 5], + predicted_objs[:, 4], + np.zeros(0), + ) + ) + else: matches = match_detection_batch(predicted_objs, true_objs, iou_thresholds) stats.append( ( @@ -306,6 +320,7 @@ class mAP50(BaseMetric): def __init__(self, num_classes, classwise_analysis, **kwargs): metric_name = 'mAP50' # Name for logging super().__init__(metric_name=metric_name, num_classes=num_classes, classwise_analysis=classwise_analysis) + self.weighted_metric_meter = MetricMeter(f'{metric_name}_weighted', ':6.2f') def calibrate(self, predictions, targets, **kwargs): stats = kwargs['stats'] # Get from DetectionMetricAdapter @@ -319,8 +334,17 @@ def calibrate(self, predictions, targets, **kwargs): for i, classwise_meter in enumerate(self.classwise_metric_meters): classwise_meter.update(average_precisions[i, 0]) self.metric_meter.update(np.nanmean(average_precisions[:, 0])) + + # Weighted mean by GT instance count + true_class_ids = concatenated_stats[3] + unique_classes, class_counts = np.unique(true_class_ids[true_class_ids >= 0], return_counts=True) + ap_at_iou = average_precisions[unique_classes.astype(int), 0] + valid = ~np.isnan(ap_at_iou) + weighted = float(np.sum(ap_at_iou[valid] * class_counts[valid]) / np.sum(class_counts[valid])) if valid.sum() > 0 else 0.0 + self.weighted_metric_meter.update(weighted) else: self.metric_meter.update(0) + self.weighted_metric_meter.update(0) class mAP75(BaseMetric): @@ -328,6 +352,7 @@ def __init__(self, num_classes, classwise_analysis, **kwargs): # TODO: Select metrics by user metric_name = 'mAP75' super().__init__(metric_name=metric_name, num_classes=num_classes, classwise_analysis=classwise_analysis) + self.weighted_metric_meter = MetricMeter(f'{metric_name}_weighted', ':6.2f') def calibrate(self, predictions, targets, **kwargs): stats = kwargs['stats'] # Get from DetectionMetricAdapter @@ -341,8 +366,17 @@ def calibrate(self, predictions, targets, **kwargs): for i, classwise_meter in enumerate(self.classwise_metric_meters): classwise_meter.update(average_precisions[i, 5]) self.metric_meter.update(np.nanmean(average_precisions[:, 5])) + + # Weighted mean by GT instance count + true_class_ids = concatenated_stats[3] + unique_classes, class_counts = np.unique(true_class_ids[true_class_ids >= 0], return_counts=True) + ap_at_iou = average_precisions[unique_classes.astype(int), 5] + valid = ~np.isnan(ap_at_iou) + weighted = float(np.sum(ap_at_iou[valid] * class_counts[valid]) / np.sum(class_counts[valid])) if valid.sum() > 0 else 0.0 + self.weighted_metric_meter.update(weighted) else: self.metric_meter.update(0) + self.weighted_metric_meter.update(0) class mAP50_95(BaseMetric): @@ -350,6 +384,7 @@ def __init__(self, num_classes, classwise_analysis, **kwargs): # TODO: Select metrics by user metric_name = 'mAP50_95' super().__init__(metric_name=metric_name, num_classes=num_classes, classwise_analysis=classwise_analysis) + self.weighted_metric_meter = MetricMeter(f'{metric_name}_weighted', ':6.2f') def calibrate(self, predictions, targets, **kwargs): stats = kwargs['stats'] # Get from DetectionMetricAdapter @@ -363,14 +398,24 @@ def calibrate(self, predictions, targets, **kwargs): for i, classwise_meter in enumerate(self.classwise_metric_meters): classwise_meter.update(np.nanmean(average_precisions[i, :])) self.metric_meter.update(np.nanmean(average_precisions)) + + # Weighted mean by GT instance count (averaged over IoU thresholds) + true_class_ids = concatenated_stats[3] + unique_classes, class_counts = np.unique(true_class_ids[true_class_ids >= 0], return_counts=True) + ap_mean_per_class = np.nanmean(average_precisions[unique_classes.astype(int), :], axis=1) + valid = ~np.isnan(ap_mean_per_class) + weighted = float(np.sum(ap_mean_per_class[valid] * class_counts[valid]) / np.sum(class_counts[valid])) if valid.sum() > 0 else 0.0 + self.weighted_metric_meter.update(weighted) else: self.metric_meter.update(0) + self.weighted_metric_meter.update(0) class Precision50(BaseMetric): def __init__(self, num_classes, classwise_analysis, **kwargs): metric_name = 'Precision50' super().__init__(metric_name=metric_name, num_classes=num_classes, classwise_analysis=classwise_analysis) + self.weighted_metric_meter = MetricMeter(f'{metric_name}_weighted', ':6.2f') def calibrate(self, predictions, targets, **kwargs): stats = kwargs['stats'] @@ -381,27 +426,46 @@ def calibrate(self, predictions, targets, **kwargs): if self.classwise_analysis: for i, classwise_meter in enumerate(self.classwise_metric_meters): - classwise_meter.update(np.nanmean(precisions[i, :])) - self.metric_meter.update(np.nanmean(precisions)) + classwise_meter.update(precisions[i, 0]) # IoU=0.5 only (index 0) + self.metric_meter.update(np.nanmean(precisions[:, 0])) # IoU=0.5 only (index 0) + + # Weighted mean by GT instance count at IoU=0.5 + true_class_ids = concatenated_stats[3] + unique_classes, class_counts = np.unique(true_class_ids[true_class_ids >= 0], return_counts=True) + p_at_iou = precisions[unique_classes.astype(int), 0] + valid = ~np.isnan(p_at_iou) + weighted = float(np.sum(p_at_iou[valid] * class_counts[valid]) / np.sum(class_counts[valid])) if valid.sum() > 0 else 0.0 + self.weighted_metric_meter.update(weighted) else: self.metric_meter.update(0) + self.weighted_metric_meter.update(0) class Recall50(BaseMetric): def __init__(self, num_classes, classwise_analysis, **kwargs): metric_name = 'Recall50' super().__init__(metric_name=metric_name, num_classes=num_classes, classwise_analysis=classwise_analysis) + self.weighted_metric_meter = MetricMeter(f'{metric_name}_weighted', ':6.2f') def calibrate(self, predictions, targets, **kwargs): stats = kwargs['stats'] if stats: concatenated_stats = [np.concatenate(items, 0) for items in zip(*stats)] - recalls = recall_per_class(*concatenated_stats, num_classes=self.num_classes)[:, 0:1] + recalls = recall_per_class(*concatenated_stats, num_classes=self.num_classes)[:, 0:1] # IoU=0.5 only if self.classwise_analysis: for i, classwise_meter in enumerate(self.classwise_metric_meters): classwise_meter.update(np.nanmean(recalls[i, :])) self.metric_meter.update(np.nanmean(recalls)) + + # Weighted mean by GT instance count at IoU=0.5 + true_class_ids = concatenated_stats[3] + unique_classes, class_counts = np.unique(true_class_ids[true_class_ids >= 0], return_counts=True) + r_at_iou = recalls[unique_classes.astype(int), 0] + valid = ~np.isnan(r_at_iou) + weighted = float(np.sum(r_at_iou[valid] * class_counts[valid]) / np.sum(class_counts[valid])) if valid.sum() > 0 else 0.0 + self.weighted_metric_meter.update(weighted) else: self.metric_meter.update(0) + self.weighted_metric_meter.update(0) diff --git a/src/netspresso_trainer/models/utils.py b/src/netspresso_trainer/models/utils.py index df54087a..225ae43a 100644 --- a/src/netspresso_trainer/models/utils.py +++ b/src/netspresso_trainer/models/utils.py @@ -56,6 +56,10 @@ 'yolox_m': 'coco', 'yolox_l': 'coco', 'yolox_x': 'coco', + 'yolov9_tiny': 'coco', + 'yolov9_s': 'coco', + 'yolov9_c': 'coco', + 'yolov9_m': 'coco', 'rtdetr_res18': 'coco', 'rtdetr_res50': 'coco', 'yolo_fastest_v2': 'coco', @@ -134,11 +138,23 @@ 'yolox_x': { 'coco': "https://netspresso-trainer-public.s3.ap-northeast-2.amazonaws.com/checkpoint/yolox/yolox_x_coco.safetensors?versionId=NWskUEbSGviBWskHQ3P1dQZXnRXOR1WN", }, + 'yolov9_tiny': { + "coco": "https://netspresso-trainer-public.s3.ap-northeast-2.amazonaws.com/checkpoint/yolov9/yolov9_tiny_coco.safetensors?versionId=lFU6CTU6CayTyETvHr4o8k_Sh26vRH2F", + }, + "yolov9_s": { + "coco": "https://netspresso-trainer-public.s3.ap-northeast-2.amazonaws.com/checkpoint/yolov9/yolov9_s_coco.safetensors?versionId=EpMf6UaAZC0qwIRmQVR_mqeRObskt2PK", + }, + "yolov9_m": { + "coco": "https://netspresso-trainer-public.s3.ap-northeast-2.amazonaws.com/checkpoint/yolov9/yolov9_m_coco.safetensors?versionId=jjmbLq_06YW1VyUPexSvl6KDOBQJpFd5" + }, + "yolov9_c": { + "coco": "https://netspresso-trainer-public.s3.ap-northeast-2.amazonaws.com/checkpoint/yolov9/yolov9_c_coco.safetensors?versionId=TDZWEU8pi_c0ZHPS_U073BoqXaUFCviN", + }, 'rtdetr_res18': { - 'coco': "https://netspresso-trainer-public.s3.ap-northeast-2.amazonaws.com/checkpoint/rtdetr/rtdetr_res18_coco.safetensors?versionId=uu9v49NI6rQx8wOY6bJbEXUFOG_R9xqH", + 'coco': "https://netspresso-trainer-public.s3.ap-northeast-2.amazonaws.com/checkpoint/rtdetr/rtdetr_res18_coco.safetensors?versionId=9uegrNukkbp5ySO4vC52WPFhUEbEpEbD", }, 'rtdetr_res50': { - 'coco': "https://netspresso-trainer-public.s3.ap-northeast-2.amazonaws.com/checkpoint/rtdetr/rtdetr_res50_coco.safetensors?versionId=JHmnjY13BEflpnDCYPFJ1c17UwpqDrLQ", + 'coco': "https://netspresso-trainer-public.s3.ap-northeast-2.amazonaws.com/checkpoint/rtdetr/rtdetr_res50_coco.safetensors?versionId=ZwwBP5C9CE2oRoBJy5Gjr7aTFMAb2hdz", }, 'yolo_fastest_v2': { 'coco': "https://netspresso-trainer-public.s3.ap-northeast-2.amazonaws.com/checkpoint/yolofastest/yolo_fastest_v2_coco.safetensors?versionId=CGhNjiZygGVjtHm0M586DzQ6.2FqWvl1" diff --git a/src/netspresso_trainer/pipelines/evaluation.py b/src/netspresso_trainer/pipelines/evaluation.py index cb452675..3cc8381d 100644 --- a/src/netspresso_trainer/pipelines/evaluation.py +++ b/src/netspresso_trainer/pipelines/evaluation.py @@ -102,7 +102,8 @@ def log_end_evaluation( if 'classwise' in metrics[list(metrics.keys())[0]]: tmp_metrics = {} for metric_name, metric in metrics.items(): - tmp_metrics[metric_name] = {'mean': metric['mean'], 'classwise': {}} + tmp_metrics[metric_name] = {k: v for k, v in metric.items() if k != 'classwise'} + tmp_metrics[metric_name]['classwise'] = {} for cls_num, score in metric['classwise'].items(): cls_name = self.logger.class_map[cls_num] if cls_num in self.logger.class_map else 'mean' tmp_metrics[metric_name]['classwise'][f'{cls_num}_{cls_name}'] = score diff --git a/src/netspresso_trainer/pipelines/train.py b/src/netspresso_trainer/pipelines/train.py index cb42080e..e2be4cb0 100644 --- a/src/netspresso_trainer/pipelines/train.py +++ b/src/netspresso_trainer/pipelines/train.py @@ -35,6 +35,7 @@ from ..losses.builder import LossFactory from ..metrics.builder import MetricFactory from ..utils.checkpoint import load_checkpoint, save_checkpoint +from ..utils.exir import save_exir from ..utils.fx import save_graphmodule from ..utils.logger import yaml_for_logging from ..utils.model_ema import ModelEMA @@ -245,6 +246,19 @@ def validate(self): self.task_processor.get_metric_with_all_outputs(outputs, phase='valid', metric_factory=self.metric_factory) return outputs + def _convert_classwise_to_names(self, metrics): + first_metric = metrics[list(metrics.keys())[0]] + if 'classwise' not in first_metric: + return metrics + tmp_metrics = {} + for metric_name, metric in metrics.items(): + tmp_metrics[metric_name] = {k: v for k, v in metric.items() if k != 'classwise'} + tmp_metrics[metric_name]['classwise'] = {} + for cls_num, score in metric['classwise'].items(): + cls_name = self.logger.class_map[cls_num] if cls_num in self.logger.class_map else 'mean' + tmp_metrics[metric_name]['classwise'][f'{cls_num}_{cls_name}'] = score + return tmp_metrics + def log_end_epoch( self, epoch: int, @@ -254,6 +268,11 @@ def log_end_epoch( ): train_losses = self.loss_factory.result('train') train_metrics = self.metric_factory.result('train') + + # TODO: Move to logger + # If class-wise metrics, convert to class names + train_metrics = self._convert_classwise_to_names(train_metrics) + self.log_results(prefix='training', epoch=epoch, losses=train_losses, metrics=train_metrics, data_stats=self.train_data_stats, learning_rate=self.learning_rate, elapsed_time=time_for_epoch) @@ -263,14 +282,7 @@ def log_end_epoch( # TODO: Move to logger # If class-wise metrics, convert to class names - if 'classwise' in valid_metrics[list(valid_metrics.keys())[0]]: - tmp_metrics = {} - for metric_name, metric in valid_metrics.items(): - tmp_metrics[metric_name] = {'mean': metric['mean'], 'classwise': {}} - for cls_num, score in metric['classwise'].items(): - cls_name = self.logger.class_map[cls_num] if cls_num in self.logger.class_map else 'mean' - tmp_metrics[metric_name]['classwise'][f'{cls_num}_{cls_name}'] = score - valid_metrics = tmp_metrics + valid_metrics = self._convert_classwise_to_names(valid_metrics) self.log_results(prefix='validation', epoch=epoch, samples=valid_samples, losses=valid_losses, metrics=valid_metrics, data_stats=self.eval_data_stats) @@ -355,6 +367,12 @@ def save_best(self): sample_input=self.sample_input.type(save_dtype), opset_version=opset_version) logger.info(f"ONNX model converting and saved at {str(model_save_path.with_suffix('.onnx'))}") + + save_exir(best_model, + model_save_path.with_suffix('.exir'), + sample_input=self.sample_input.type(save_dtype)) + logger.info(f"EXIR model converting and saved at {str(model_save_path.with_suffix('.exir'))}") + if self.logger.use_mlflow: self.logger.mlflow_logger.log_onnx_model(model_save_path.with_suffix('.onnx'), input_example=self.sample_input.type(save_dtype)) diff --git a/src/netspresso_trainer/utils/exir.py b/src/netspresso_trainer/utils/exir.py new file mode 100644 index 00000000..4dda99a1 --- /dev/null +++ b/src/netspresso_trainer/utils/exir.py @@ -0,0 +1,36 @@ +# Copyright (C) 2024 Nota Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# ---------------------------------------------------------------------------- +from pathlib import Path +from typing import Union + +import torch +import torch.nn as nn +from loguru import logger +from torch import Tensor + +from .environment import get_device + +__all__ = ['save_exir'] + + +def save_exir(model: nn.Module, f: Union[str, Path], sample_input: Tensor): + if not hasattr(torch, 'export'): + logger.warning("Current torch version does not support torch.export. Please upgrade torch.") + return + sample_input = sample_input.to(get_device(model)) + exported_program = torch.export.export(model, (sample_input, )) + torch.export.save(exported_program, f) + return exported_program diff --git a/src/netspresso_trainer/utils/logger.py b/src/netspresso_trainer/utils/logger.py index d167b122..d3c7412b 100644 --- a/src/netspresso_trainer/utils/logger.py +++ b/src/netspresso_trainer/utils/logger.py @@ -40,7 +40,7 @@ def rank_filter(record): try: return dist.get_rank() == 0 - except RuntimeError: # Default process group has not been initialized, please make sure to call init_process_group. + except (RuntimeError, ValueError): # Default process group has not been initialized, please make sure to call init_process_group. return True def get_format(level: str, distributed: bool = False): diff --git a/tools/exir_convert.py b/tools/exir_convert.py new file mode 100644 index 00000000..ffaee37d --- /dev/null +++ b/tools/exir_convert.py @@ -0,0 +1,90 @@ +# Copyright (C) 2024 Nota Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# ---------------------------------------------------------------------------- + +import argparse +import os +from itertools import chain +from pathlib import Path +from typing import List + +import torch +import torch.nn as nn +from netspresso_trainer.models import build_model, is_single_task_model +from netspresso_trainer.utils.exir import save_exir +from omegaconf import OmegaConf + + + +TEMP_NUM_CLASSES = 80 + + +def parse_args(): + parser = argparse.ArgumentParser(description="Parser for NetsPresso Export conversion") + + parser.add_argument( + '-c', '--config-path', type=str, default="config/model/yolox/yolox-s-detection.yaml", + help="Model config path") + parser.add_argument( + '-n', '--num-classes', type=int, default=TEMP_NUM_CLASSES, + help="Number of classes") + parser.add_argument( + '-o', '--output-dir', type=str, default="exir/", + help="Export model output directory") + parser.add_argument( + '--sample-size', type=int, nargs=2, default=(640, 640), + help="Input sample size") + parser.add_argument( + '--debug', action='store_true', help="Debug mode to check with the error message") + + args, _ = parser.parse_known_args() + return args + + +def get_model_config_path_list(config_path_or_dir: Path) -> List[Path]: + if config_path_or_dir.is_dir(): + config_dir = config_path_or_dir + return sorted(chain(config_dir.glob("*.yaml"), config_dir.glob("*.yml"))) + config_path = config_path_or_dir + return [config_path] + + +if __name__ == '__main__': + args = parse_args() + + config_path_list = get_model_config_path_list(Path(args.config_path)) + os.makedirs(args.output_dir, exist_ok=True) + + for model_config_path in config_path_list: + try: + print(f"Export conversion for ({model_config_path})..... ", end='', flush=True) + config = OmegaConf.load(model_config_path) + config = config.model + config.single_task_model = is_single_task_model(config) + torch_model: nn.Module = build_model(config, num_classes=args.num_classes, devices=torch.device("cpu"), distributed=False) + torch_model.eval() + sample_input = torch.randn(1, 3, *args.sample_size) + save_exir(torch_model, + f=Path(args.output_dir) / f"{model_config_path.stem}.exir", + sample_input=sample_input) + print("Success!") + except KeyboardInterrupt: + print("") + break + except Exception as e: + print("Failed!") + if args.debug: + raise e + print(e)