diff --git a/CHANGELOG.md b/CHANGELOG.md index 79e62151..8e076b22 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,25 @@ No changes to highlight. ## Other Changes: +No changes to highlight. + +# v1.4.1 + +## New Features: + +- Add yolov9 pretrained weights by `@illian01` in [PR 631](https://github.com/Nota-NetsPresso/netspresso-trainer/pull/631) +- Add EXIR exporting feature by `@illian01` in [PR 632](https://github.com/Nota-NetsPresso/netspresso-trainer/pull/632) + +## Bug Fixes: + +No changes to highlight. + +## Breaking Changes: + +No changes to highlight. + +## Other Changes: + Fix/add data params mlflow by `@hglee98` in [PR 629](https://github.com/Nota-NetsPresso/netspresso-trainer/pull/629) # 1.4.0 diff --git a/Dockerfile b/Dockerfile index 8c37353f..66d5dd5f 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM python:3.8.16 +FROM python:3.10 ARG TORCH_VERSION="2.0.1" ARG TORCHVISION_VERSION="0.15.2" diff --git a/README.md b/README.md index aa8c7c8c..20160d20 100644 --- a/README.md +++ b/README.md @@ -33,8 +33,8 @@ _____ ### Prerequisites -- Python `3.8` | `3.9` | `3.10` -- PyTorch `2.0.1` (recommended) (compatible with: `1.11.x` - `2.0.1`) +- Python `>=3.10` +- PyTorch `>=2.0.1` ### Install with pypi diff --git a/config/benchmark_examples/detection-coco2017-yolov9_tiny/augmentation.yaml b/config/benchmark_examples/detection-coco2017-yolov9_tiny/augmentation.yaml new file mode 100644 index 00000000..145ada0b --- /dev/null +++ b/config/benchmark_examples/detection-coco2017-yolov9_tiny/augmentation.yaml @@ -0,0 +1,56 @@ +augmentation: + train: + - + name: mosaicdetection + size: [640, 640] + mosaic_prob: 1.0 + affine_scale: [0.1, 1.9] + degrees: 0.0 + translate: 0.1 + shear: 0.0 + enable_mixup: True + mixup_prob: 0.15 + mixup_scale: [0.1, 2.0] + fill: 0 + mosaic_off_duration: 15 + - + name: hsvjitter + h_mag: 5 + s_mag: 30 + v_mag: 30 + - + name: randomhorizontalflip + p: 0.5 + - + name: resize + size: 640 + interpolation: bilinear + max_size: ~ + resize_criteria: long + - + name: pad + size: 640 + fill: 0 + - + name: randomresize + base_size: [640, 640] + stride: 32 + random_range: 5 + interpolation: bilinear + - + name: totensor + pixel_range: 1.0 + inference: + - + name: resize + size: 640 + interpolation: bilinear + max_size: ~ + resize_criteria: long + - + name: pad + size: 640 + fill: 0 + - + name: totensor + pixel_range: 1.0 diff --git a/config/benchmark_examples/detection-coco2017-yolov9_tiny/data.yaml b/config/benchmark_examples/detection-coco2017-yolov9_tiny/data.yaml new file mode 100644 index 00000000..86801f45 --- /dev/null +++ b/config/benchmark_examples/detection-coco2017-yolov9_tiny/data.yaml @@ -0,0 +1,21 @@ +data: + name: coco2017 + task: detection + format: local # local, huggingface + path: + root: ./data/coco2017 # dataset root + train: + image: images/train # directory for training images + label: labels/train # directory for training labels + valid: + image: images/valid # directory for valid images + label: labels/valid # directory for valid labels + test: + image: ~ + label: ~ + pattern: + image: ~ + label: ~ + id_mapping: id_mapping.json + # id_mapping: ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'] + pallete: ~ diff --git a/config/benchmark_examples/detection-coco2017-yolov9_tiny/environment.yaml b/config/benchmark_examples/detection-coco2017-yolov9_tiny/environment.yaml new file mode 100644 index 00000000..a5b5650d --- /dev/null +++ b/config/benchmark_examples/detection-coco2017-yolov9_tiny/environment.yaml @@ -0,0 +1,6 @@ +environment: + seed: 1 + num_workers: 4 + gpus: 0, 1, 2, 3, 4, 5, 6, 7 + batch_size: 16 # Batch size per gpu + cache_data: False \ No newline at end of file diff --git a/config/benchmark_examples/detection-coco2017-yolov9_tiny/logging.yaml b/config/benchmark_examples/detection-coco2017-yolov9_tiny/logging.yaml new file mode 100644 index 00000000..d352ee29 --- /dev/null +++ b/config/benchmark_examples/detection-coco2017-yolov9_tiny/logging.yaml @@ -0,0 +1,18 @@ +logging: + project_id: ~ + output_dir: ./outputs + tensorboard: true + mlflow: false + stdout: true + num_save_samples: 16 # num_save_samples should be >= 0 or None + model_save_options: + save_optimizer_state: true + save_best_only: false + best_model_criterion: loss # metric + sample_input_size: [640, 640] # Used for flops and onnx export + onnx_export_opset: 13 # Recommend in range [13, 17] + validation_epoch: &validation_epoch 10 + save_checkpoint_epoch: *validation_epoch # Multiplier of `validation_epoch`. + metrics: + classwise_analysis: False + metric_names: ~ # None for default settings \ No newline at end of file diff --git a/config/benchmark_examples/detection-coco2017-yolov9_tiny/model.yaml b/config/benchmark_examples/detection-coco2017-yolov9_tiny/model.yaml new file mode 100644 index 00000000..0030cb91 --- /dev/null +++ b/config/benchmark_examples/detection-coco2017-yolov9_tiny/model.yaml @@ -0,0 +1,72 @@ +model: + task: detection + name: yolov9_tiny + checkpoint: + use_pretrained: false + load_head: false + path: ~ + optimizer_path: ~ + freeze_backbone: false + architecture: + full: ~ # auto + backbone: + name: gelan + params: + stem_out_channels: 16 + stem_kernel_size: 3 + stem_stride: 2 + return_stage_idx: [1, 2, 3] + act_type: &act_type silu + stage_params: + # Conv2D: ['conv', out_channels, kernel_size, stride] + # ELAN: ['elan', out_channels, part_channels, use_identity] + # RepNCSPELAN: ['repncspelan', out_channels, part_channels, use_identity, depth] + # AConv: ['aconv', out_channels] + # ADown: ['adown', out_channels] + - + - ['conv', 32, 3, 2] + - ['elan', 32, 32, false] + - + - ['aconv', 64] + - ['repncspelan', 64, 64, false, 3] + - + - ['aconv', 96] + - ['repncspelan', 96, 96, false, 3] + - + - ['aconv', 128] + - ['repncspelan', 128, 128, false, 3] + neck: + name: yolov9fpn + params: + repeat_num: 3 + act_type: *act_type + use_aux_loss: &use_aux_loss false + bu_type: aconv + spp_channels: 128 + n4_channels: 96 + p3_channels: 64 + p3_to_p4_channels: 48 + p4_channels: 96 + p4_to_p5_channels: 64 + p5_channels: 128 + head: + name: yolo_detection_head + params: + version: v9 + num_anchors: ~ + use_group: true + reg_max: ®_max 16 + act_type: *act_type + use_aux_loss: *use_aux_loss + postprocessor: + params: + # postprocessor - decode + reg_max: *reg_max + score_thresh: 0.01 + # postprocessor - nms + nms_thresh: 0.65 + class_agnostic: false + losses: + - criterion: yolov9_loss + weight: ~ + l1_activate_epoch: ~ diff --git a/config/benchmark_examples/detection-coco2017-yolov9_tiny/training.yaml b/config/benchmark_examples/detection-coco2017-yolov9_tiny/training.yaml new file mode 100644 index 00000000..2721976c --- /dev/null +++ b/config/benchmark_examples/detection-coco2017-yolov9_tiny/training.yaml @@ -0,0 +1,23 @@ +training: + epochs: 500 + mixed_precision: True + max_norm: ~ + ema: + name: exp_decay + decay: 0.9999 + beta: 2000 + optimizer: + name: sgd + lr: 0.01 + momentum: 0.937 + weight_decay: 0.0005 # No bias and norm decay + nesterov: True + no_bias_decay: True + no_norm_weight_decay: True + overwrite: ~ + scheduler: + name: cosine_no_sgdr + warmup_epochs: 3 + warmup_bias_lr: 0.001 + min_lr: 0.0001 + end_epoch: 485 diff --git a/config/model/mobilenetv4/mobilenetv4-conv-medium-classification.yaml b/config/model/mobilenetv4/mobilenetv4-conv-medium-classification.yaml index 5301ebe9..6a896c19 100644 --- a/config/model/mobilenetv4/mobilenetv4-conv-medium-classification.yaml +++ b/config/model/mobilenetv4/mobilenetv4-conv-medium-classification.yaml @@ -21,7 +21,7 @@ model: norm_type: batch_norm act_type: relu return_stage_idx: ~ - layer_scale: 0.1 + layer_scale: ~ stage_params: # Conv2D: ['conv', out_channels, kernel_size, stride] # FusedIB: ['fi', out_channels, hidden_channels, kernel_size, stride] diff --git a/config/model/mobilenetv4/mobilenetv4-conv-small-classification.yaml b/config/model/mobilenetv4/mobilenetv4-conv-small-classification.yaml index a71d02a3..5114fd33 100644 --- a/config/model/mobilenetv4/mobilenetv4-conv-small-classification.yaml +++ b/config/model/mobilenetv4/mobilenetv4-conv-small-classification.yaml @@ -21,7 +21,7 @@ model: norm_type: batch_norm act_type: relu return_stage_idx: ~ - layer_scale: 0.1 + layer_scale: ~ stage_params: # Conv2D: ['conv', out_channels, kernel_size, stride] # FusedIB: ['fi', out_channels, hidden_channels, kernel_size, stride] diff --git a/config/model/mobilenetv4/mobilenetv4-hybrid-large-classification.yaml b/config/model/mobilenetv4/mobilenetv4-hybrid-large-classification.yaml index c5736965..13d2a0aa 100644 --- a/config/model/mobilenetv4/mobilenetv4-hybrid-large-classification.yaml +++ b/config/model/mobilenetv4/mobilenetv4-hybrid-large-classification.yaml @@ -21,7 +21,7 @@ model: norm_type: batch_norm act_type: gelu return_stage_idx: ~ - layer_scale: ~ + layer_scale: 0.1 stage_params: # Conv2D: ['conv', out_channels, kernel_size, stride] # FusedIB: ['fi', out_channels, hidden_channels, kernel_size, stride] diff --git a/config/model/yolo/yolov9_c-detection.yaml b/config/model/yolov9/yolov9_c-detection.yaml similarity index 98% rename from config/model/yolo/yolov9_c-detection.yaml rename to config/model/yolov9/yolov9_c-detection.yaml index 56893e9f..50cfb0dd 100644 --- a/config/model/yolo/yolov9_c-detection.yaml +++ b/config/model/yolov9/yolov9_c-detection.yaml @@ -2,7 +2,7 @@ model: task: detection name: yolov9_c checkpoint: - use_pretrained: false + use_pretrained: true load_head: false path: ~ optimizer_path: ~ diff --git a/config/model/yolo/yolov9_m-detection.yaml b/config/model/yolov9/yolov9_m-detection.yaml similarity index 98% rename from config/model/yolo/yolov9_m-detection.yaml rename to config/model/yolov9/yolov9_m-detection.yaml index dee08ea8..d8fa0088 100644 --- a/config/model/yolo/yolov9_m-detection.yaml +++ b/config/model/yolov9/yolov9_m-detection.yaml @@ -2,7 +2,7 @@ model: task: detection name: yolov9_m checkpoint: - use_pretrained: false + use_pretrained: true load_head: false path: ~ optimizer_path: ~ diff --git a/config/model/yolo/yolov9_s-detection.yaml b/config/model/yolov9/yolov9_s-detection.yaml similarity index 98% rename from config/model/yolo/yolov9_s-detection.yaml rename to config/model/yolov9/yolov9_s-detection.yaml index b041c1c6..6beafd85 100644 --- a/config/model/yolo/yolov9_s-detection.yaml +++ b/config/model/yolov9/yolov9_s-detection.yaml @@ -2,7 +2,7 @@ model: task: detection name: yolov9_s checkpoint: - use_pretrained: false + use_pretrained: true load_head: false path: ~ optimizer_path: ~ diff --git a/config/model/yolov9/yolov9_tiny-detection.yaml b/config/model/yolov9/yolov9_tiny-detection.yaml new file mode 100644 index 00000000..65bbd170 --- /dev/null +++ b/config/model/yolov9/yolov9_tiny-detection.yaml @@ -0,0 +1,72 @@ +model: + task: detection + name: yolov9_tiny + checkpoint: + use_pretrained: true + load_head: false + path: ~ + optimizer_path: ~ + freeze_backbone: false + architecture: + full: ~ # auto + backbone: + name: gelan + params: + stem_out_channels: 16 + stem_kernel_size: 3 + stem_stride: 2 + return_stage_idx: [1, 2, 3] + act_type: &act_type silu + stage_params: + # Conv2D: ['conv', out_channels, kernel_size, stride] + # ELAN: ['elan', out_channels, part_channels, use_identity] + # RepNCSPELAN: ['repncspelan', out_channels, part_channels, use_identity, depth] + # AConv: ['aconv', out_channels] + # ADown: ['adown', out_channels] + - + - ['conv', 32, 3, 2] + - ['elan', 32, 32, false] + - + - ['aconv', 64] + - ['repncspelan', 64, 64, false, 3] + - + - ['aconv', 96] + - ['repncspelan', 96, 96, false, 3] + - + - ['aconv', 128] + - ['repncspelan', 128, 128, false, 3] + neck: + name: yolov9fpn + params: + repeat_num: 3 + act_type: *act_type + use_aux_loss: &use_aux_loss false + bu_type: aconv + spp_channels: 128 + n4_channels: 96 + p3_channels: 64 + p3_to_p4_channels: 48 + p4_channels: 96 + p4_to_p5_channels: 64 + p5_channels: 128 + head: + name: yolo_detection_head + params: + version: v9 + num_anchors: ~ + use_group: true + reg_max: ®_max 16 + act_type: *act_type + use_aux_loss: *use_aux_loss + postprocessor: + params: + # postprocessor - decode + reg_max: *reg_max + score_thresh: 0.01 + # postprocessor - nms + nms_thresh: 0.65 + class_agnostic: false + losses: + - criterion: yolov9_loss + weight: ~ + l1_activate_epoch: ~ diff --git a/docs/benchmarks/benchmarks.md b/docs/benchmarks/benchmarks.md index 0fec534b..e3f7e0f6 100644 --- a/docs/benchmarks/benchmarks.md +++ b/docs/benchmarks/benchmarks.md @@ -40,6 +40,10 @@ If you have a better recipe, please share with us anytime. We appreciate all eff |---|---|---|---|---|---|---|---|---|---|---| | COCO-val | [RT-DETR_res18*](https://github.com/Nota-NetsPresso/netspresso-trainer/blob/master/config/model/rtdetr/rtdetr-res18-detection.yaml) | [download](https://netspresso-trainer-public.s3.ap-northeast-2.amazonaws.com/checkpoint/rtdetr/rtdetr_res18_coco.safetensors?versionId=uu9v49NI6rQx8wOY6bJbEXUFOG_R9xqH) | (640, 640) | 65.77 | 52.75 | 48.49 | 20.18M | 40.36G | Supported | No input z-norm, [lyuwenyu/RT-DETR](https://github.com/lyuwenyu/RT-DETR/tree/main/rtdetr_pytorch) | | COCO-val | [RT-DETR_res50*](https://github.com/Nota-NetsPresso/netspresso-trainer/blob/master/config/model/rtdetr/rtdetr-res50-detection.yaml) | [download](https://netspresso-trainer-public.s3.ap-northeast-2.amazonaws.com/checkpoint/rtdetr/rtdetr_res50_coco.safetensors?versionId=JHmnjY13BEflpnDCYPFJ1c17UwpqDrLQ) | (640, 640) | 72.64 | 59.50 | 54.73 | 42.94M | 138.36G | Supported | No input z-norm, [lyuwenyu/RT-DETR](https://github.com/lyuwenyu/RT-DETR/tree/main/rtdetr_pytorch) | +| COCO-val | [yolov9-tiny](https://github.com/Nota-NetsPresso/netspresso-trainer/blob/master/config/model/yolov9/yolov9_tiny-detection.yaml) | [download](https://netspresso-trainer-public.s3.ap-northeast-2.amazonaws.com/checkpoint/yolov9/yolov9_tiny_coco.safetensors) | (640, 640) | 50.03 | 38.63 | 36.02 | 2.44M | 9.99G | Supported | No input z-norm | +| COCO-val | [yolov9-s*](https://github.com/Nota-NetsPresso/netspresso-trainer/blob/master/config/model/yolov9/yolov9_s-detection.yaml) | [download](https://netspresso-trainer-public.s3.ap-northeast-2.amazonaws.com/checkpoint/yolov9/yolov9_s_coco.safetensors) | (640, 640) | 62.63 | 51.13 | 47.13 | 7.23M | 26.87G | Supported | No input z-norm, [YOLO](https://yolo-docs.readthedocs.io/en/latest/2_model_zoo/0_object_detection.html) | +| COCO-val | [yolov9-m*](https://github.com/Nota-NetsPresso/netspresso-trainer/blob/master/config/model/yolov9/yolov9_m-detection.yaml) | [download](https://netspresso-trainer-public.s3.ap-northeast-2.amazonaws.com/checkpoint/yolov9/yolov9_m_coco.safetensors) | (640, 640) | 67.43 | 56.13 | 51.72 | 20.12M | 77.08G | Supported | No input z-norm, [YOLO](https://yolo-docs.readthedocs.io/en/latest/2_model_zoo/0_object_detection.html) | +| COCO-val | [yolov9-c*](https://github.com/Nota-NetsPresso/netspresso-trainer/blob/master/config/model/yolov9/yolov9_c-detection.yaml) | [download](https://netspresso-trainer-public.s3.ap-northeast-2.amazonaws.com/checkpoint/yolov9/yolov9_c_coco.safetensors) | (640, 640) | 69.16 | 57.90 | 53.28 | 25.50M | 103.17G | Supported | No input z-norm, [YOLO](https://yolo-docs.readthedocs.io/en/latest/2_model_zoo/0_object_detection.html) | | COCO-val | [YOLOX-nano*](https://github.com/Nota-NetsPresso/netspresso-trainer/blob/master/config/model/yolox/yolox-nano-detection.yaml) | [download](https://netspresso-trainer-public.s3.ap-northeast-2.amazonaws.com/checkpoint/yolox/yolox_nano_coco.safetensors?versionId=JCXugDTwGegx9Kl6Jc5AMJpIkA.WlNVP) | (416, 416) | 41.30 | 27.90 | 26.33 | 0.91M | 1.08G | Supported | [Megvii-BaseDetection/YOLOX](https://github.com/Megvii-BaseDetection/YOLOX?tab=readme-ov-file#benchmark), conf_thresh=0.01, nms_thresh=0.65 | | COCO-val | [YOLOX-tiny*](https://github.com/Nota-NetsPresso/netspresso-trainer/blob/master/config/model/yolox/yolox-tiny-detection.yaml) | [download](https://netspresso-trainer-public.s3.ap-northeast-2.amazonaws.com/checkpoint/yolox/yolox_tiny_coco.safetensors?versionId=lJp1bCEToD_6IaL9kRCqcYIwVZ.QQ.1P) | (416, 416) | 50.69 | 36.18 | 34.00 | 5.06M | 6.45G | Supported | [Megvii-BaseDetection/YOLOX](https://github.com/Megvii-BaseDetection/YOLOX?tab=readme-ov-file#benchmark), conf_thresh=0.01, nms_thresh=0.65 | | COCO-val | [YOLOX-s](https://github.com/Nota-NetsPresso/netspresso-trainer/blob/master/config/model/yolox/yolox-s-detection.yaml) | [download](https://netspresso-trainer-public.s3.ap-northeast-2.amazonaws.com/checkpoint/yolox/yolox_s_coco.safetensors?versionId=QRLqHKqhv8TSYBrmsQ3M8lCR8w7HEZyA) | (640, 640) | 58.56 | 44.10 | 40.63 | 8.97M | 26.81G | Supported | conf_thresh=0.01, nms_thresh=0.65 | diff --git a/docs/getting_started/installation/installation.md b/docs/getting_started/installation/installation.md index b8c42fdf..e0b88d05 100644 --- a/docs/getting_started/installation/installation.md +++ b/docs/getting_started/installation/installation.md @@ -2,8 +2,8 @@ ### Prerequisites -- Python `3.8` | `3.9` | `3.10` -- PyTorch `2.0.1` (recommended) (compatible with: `1.11.x` - `2.0.1`) +- Python `>=3.10` +- PyTorch `>=2.0.1` ### Install with pypi diff --git a/requirements.txt b/requirements.txt index 5ffe68ff..e00673bb 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ -torch>=1.11.0,<=2.0.1 -torchvision>=0.12.0,<=0.15.2 +torch>=2.0.1 +torchvision onnx onnxruntime numpy diff --git a/src/netspresso_trainer/VERSION b/src/netspresso_trainer/VERSION index e21e727f..13175fdc 100644 --- a/src/netspresso_trainer/VERSION +++ b/src/netspresso_trainer/VERSION @@ -1 +1 @@ -1.4.0 \ No newline at end of file +1.4.1 \ No newline at end of file diff --git a/src/netspresso_trainer/models/utils.py b/src/netspresso_trainer/models/utils.py index df54087a..225ae43a 100644 --- a/src/netspresso_trainer/models/utils.py +++ b/src/netspresso_trainer/models/utils.py @@ -56,6 +56,10 @@ 'yolox_m': 'coco', 'yolox_l': 'coco', 'yolox_x': 'coco', + 'yolov9_tiny': 'coco', + 'yolov9_s': 'coco', + 'yolov9_c': 'coco', + 'yolov9_m': 'coco', 'rtdetr_res18': 'coco', 'rtdetr_res50': 'coco', 'yolo_fastest_v2': 'coco', @@ -134,11 +138,23 @@ 'yolox_x': { 'coco': "https://netspresso-trainer-public.s3.ap-northeast-2.amazonaws.com/checkpoint/yolox/yolox_x_coco.safetensors?versionId=NWskUEbSGviBWskHQ3P1dQZXnRXOR1WN", }, + 'yolov9_tiny': { + "coco": "https://netspresso-trainer-public.s3.ap-northeast-2.amazonaws.com/checkpoint/yolov9/yolov9_tiny_coco.safetensors?versionId=lFU6CTU6CayTyETvHr4o8k_Sh26vRH2F", + }, + "yolov9_s": { + "coco": "https://netspresso-trainer-public.s3.ap-northeast-2.amazonaws.com/checkpoint/yolov9/yolov9_s_coco.safetensors?versionId=EpMf6UaAZC0qwIRmQVR_mqeRObskt2PK", + }, + "yolov9_m": { + "coco": "https://netspresso-trainer-public.s3.ap-northeast-2.amazonaws.com/checkpoint/yolov9/yolov9_m_coco.safetensors?versionId=jjmbLq_06YW1VyUPexSvl6KDOBQJpFd5" + }, + "yolov9_c": { + "coco": "https://netspresso-trainer-public.s3.ap-northeast-2.amazonaws.com/checkpoint/yolov9/yolov9_c_coco.safetensors?versionId=TDZWEU8pi_c0ZHPS_U073BoqXaUFCviN", + }, 'rtdetr_res18': { - 'coco': "https://netspresso-trainer-public.s3.ap-northeast-2.amazonaws.com/checkpoint/rtdetr/rtdetr_res18_coco.safetensors?versionId=uu9v49NI6rQx8wOY6bJbEXUFOG_R9xqH", + 'coco': "https://netspresso-trainer-public.s3.ap-northeast-2.amazonaws.com/checkpoint/rtdetr/rtdetr_res18_coco.safetensors?versionId=9uegrNukkbp5ySO4vC52WPFhUEbEpEbD", }, 'rtdetr_res50': { - 'coco': "https://netspresso-trainer-public.s3.ap-northeast-2.amazonaws.com/checkpoint/rtdetr/rtdetr_res50_coco.safetensors?versionId=JHmnjY13BEflpnDCYPFJ1c17UwpqDrLQ", + 'coco': "https://netspresso-trainer-public.s3.ap-northeast-2.amazonaws.com/checkpoint/rtdetr/rtdetr_res50_coco.safetensors?versionId=ZwwBP5C9CE2oRoBJy5Gjr7aTFMAb2hdz", }, 'yolo_fastest_v2': { 'coco': "https://netspresso-trainer-public.s3.ap-northeast-2.amazonaws.com/checkpoint/yolofastest/yolo_fastest_v2_coco.safetensors?versionId=CGhNjiZygGVjtHm0M586DzQ6.2FqWvl1" diff --git a/src/netspresso_trainer/pipelines/train.py b/src/netspresso_trainer/pipelines/train.py index cb42080e..32dd0fdf 100644 --- a/src/netspresso_trainer/pipelines/train.py +++ b/src/netspresso_trainer/pipelines/train.py @@ -35,6 +35,7 @@ from ..losses.builder import LossFactory from ..metrics.builder import MetricFactory from ..utils.checkpoint import load_checkpoint, save_checkpoint +from ..utils.exir import save_exir from ..utils.fx import save_graphmodule from ..utils.logger import yaml_for_logging from ..utils.model_ema import ModelEMA @@ -355,6 +356,12 @@ def save_best(self): sample_input=self.sample_input.type(save_dtype), opset_version=opset_version) logger.info(f"ONNX model converting and saved at {str(model_save_path.with_suffix('.onnx'))}") + + save_exir(best_model, + model_save_path.with_suffix('.exir'), + sample_input=self.sample_input.type(save_dtype)) + logger.info(f"EXIR model converting and saved at {str(model_save_path.with_suffix('.exir'))}") + if self.logger.use_mlflow: self.logger.mlflow_logger.log_onnx_model(model_save_path.with_suffix('.onnx'), input_example=self.sample_input.type(save_dtype)) diff --git a/src/netspresso_trainer/utils/exir.py b/src/netspresso_trainer/utils/exir.py new file mode 100644 index 00000000..4dda99a1 --- /dev/null +++ b/src/netspresso_trainer/utils/exir.py @@ -0,0 +1,36 @@ +# Copyright (C) 2024 Nota Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# ---------------------------------------------------------------------------- +from pathlib import Path +from typing import Union + +import torch +import torch.nn as nn +from loguru import logger +from torch import Tensor + +from .environment import get_device + +__all__ = ['save_exir'] + + +def save_exir(model: nn.Module, f: Union[str, Path], sample_input: Tensor): + if not hasattr(torch, 'export'): + logger.warning("Current torch version does not support torch.export. Please upgrade torch.") + return + sample_input = sample_input.to(get_device(model)) + exported_program = torch.export.export(model, (sample_input, )) + torch.export.save(exported_program, f) + return exported_program diff --git a/src/netspresso_trainer/utils/logger.py b/src/netspresso_trainer/utils/logger.py index d167b122..d3c7412b 100644 --- a/src/netspresso_trainer/utils/logger.py +++ b/src/netspresso_trainer/utils/logger.py @@ -40,7 +40,7 @@ def rank_filter(record): try: return dist.get_rank() == 0 - except RuntimeError: # Default process group has not been initialized, please make sure to call init_process_group. + except (RuntimeError, ValueError): # Default process group has not been initialized, please make sure to call init_process_group. return True def get_format(level: str, distributed: bool = False): diff --git a/tools/exir_convert.py b/tools/exir_convert.py new file mode 100644 index 00000000..ffaee37d --- /dev/null +++ b/tools/exir_convert.py @@ -0,0 +1,90 @@ +# Copyright (C) 2024 Nota Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# ---------------------------------------------------------------------------- + +import argparse +import os +from itertools import chain +from pathlib import Path +from typing import List + +import torch +import torch.nn as nn +from netspresso_trainer.models import build_model, is_single_task_model +from netspresso_trainer.utils.exir import save_exir +from omegaconf import OmegaConf + + + +TEMP_NUM_CLASSES = 80 + + +def parse_args(): + parser = argparse.ArgumentParser(description="Parser for NetsPresso Export conversion") + + parser.add_argument( + '-c', '--config-path', type=str, default="config/model/yolox/yolox-s-detection.yaml", + help="Model config path") + parser.add_argument( + '-n', '--num-classes', type=int, default=TEMP_NUM_CLASSES, + help="Number of classes") + parser.add_argument( + '-o', '--output-dir', type=str, default="exir/", + help="Export model output directory") + parser.add_argument( + '--sample-size', type=int, nargs=2, default=(640, 640), + help="Input sample size") + parser.add_argument( + '--debug', action='store_true', help="Debug mode to check with the error message") + + args, _ = parser.parse_known_args() + return args + + +def get_model_config_path_list(config_path_or_dir: Path) -> List[Path]: + if config_path_or_dir.is_dir(): + config_dir = config_path_or_dir + return sorted(chain(config_dir.glob("*.yaml"), config_dir.glob("*.yml"))) + config_path = config_path_or_dir + return [config_path] + + +if __name__ == '__main__': + args = parse_args() + + config_path_list = get_model_config_path_list(Path(args.config_path)) + os.makedirs(args.output_dir, exist_ok=True) + + for model_config_path in config_path_list: + try: + print(f"Export conversion for ({model_config_path})..... ", end='', flush=True) + config = OmegaConf.load(model_config_path) + config = config.model + config.single_task_model = is_single_task_model(config) + torch_model: nn.Module = build_model(config, num_classes=args.num_classes, devices=torch.device("cpu"), distributed=False) + torch_model.eval() + sample_input = torch.randn(1, 3, *args.sample_size) + save_exir(torch_model, + f=Path(args.output_dir) / f"{model_config_path.stem}.exir", + sample_input=sample_input) + print("Success!") + except KeyboardInterrupt: + print("") + break + except Exception as e: + print("Failed!") + if args.debug: + raise e + print(e)