NVIDIA
diff --git a/‎recipes/vit/.gitignore‎
Lines changed: 7 additions & 0 deletions b/‎recipes/vit/.gitignore‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎recipes/vit/.ruff.toml‎
Lines changed: 4 additions & 0 deletions b/‎recipes/vit/.ruff.toml‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎recipes/vit/Dockerfile‎
Lines changed: 9 additions & 0 deletions b/‎recipes/vit/Dockerfile‎
Lines changed: 9 additions & 0 deletions
diff --git a/‎recipes/vit/README.md‎
Lines changed: 47 additions & 0 deletions b/‎recipes/vit/README.md‎
Lines changed: 47 additions & 0 deletions
diff --git a/‎recipes/vit/config/defaults.yaml‎
Lines changed: 123 additions & 0 deletions b/‎recipes/vit/config/defaults.yaml‎
Lines changed: 123 additions & 0 deletions
diff --git a/‎recipes/vit/config/vit_base_patch16_224.yaml‎
Lines changed: 97 additions & 0 deletions b/‎recipes/vit/config/vit_base_patch16_224.yaml‎
Lines changed: 97 additions & 0 deletions
diff --git a/‎recipes/vit/config/vit_te_base_patch16_224.yaml‎
Lines changed: 12 additions & 0 deletions b/‎recipes/vit/config/vit_te_base_patch16_224.yaml‎
Lines changed: 12 additions & 0 deletions
diff --git a/‎recipes/vit/data/super-tiny-imagenet-5/train/n03404251/images/n03404251_0.JPEG‎
2.3 KB b/‎recipes/vit/data/super-tiny-imagenet-5/train/n03404251/images/n03404251_0.JPEG‎
2.3 KB
diff --git a/‎recipes/vit/data/super-tiny-imagenet-5/train/n03404251/images/n03404251_1.JPEG‎
2.46 KB b/‎recipes/vit/data/super-tiny-imagenet-5/train/n03404251/images/n03404251_1.JPEG‎
2.46 KB
diff --git a/‎recipes/vit/data/super-tiny-imagenet-5/train/n03404251/images/n03404251_10.JPEG‎
2.04 KB b/‎recipes/vit/data/super-tiny-imagenet-5/train/n03404251/images/n03404251_10.JPEG‎
2.04 KB
@@ -0,0 +1,7 @@
+*.tar*
+*.zip
+checkpoints/
+wandb/
+outputs/
+__pycache__/
+.ruff_cache/
@@ -0,0 +1,4 @@
+extend = "../.ruff.toml"
+[lint]
+per-file-ignores = { "tokenizer_auto" = ["ALL"] }
+ignore = ["RUF","D","N","E","PLW","PERF","C","F"]
@@ -0,0 +1,9 @@
+FROM nvcr.io/nvidia/pytorch:25.06-py3
+
+RUN --mount=type=secret,id=netrc,target=/root/.netrc \
+    --mount=type=cache,target=/root/.cache/pip \
+    --mount=type=bind,source=requirements.txt,target=/requirements.txt \
+    PIP_CONSTRAINT= pip install -r /requirements.txt
+
+WORKDIR /workspace
+COPY . .
@@ -0,0 +1,123 @@
+model:
+  vit:
+    img_size: 224
+    patch_size: 16
+    in_chans: 3
+    num_classes: 100000
+    global_pool: "token"
+    embed_dim: 768
+    depth: 12
+    num_heads: 12
+    mlp_ratio: 4.0
+    qkv_bias: true
+    qk_norm: false
+    scale_attn_norm: false
+    scale_mlp_norm: false
+    proj_bias: true
+    init_values: null
+    class_token: true
+    pos_embed: true
+    no_embed_class: false
+    reg_tokens: 0
+    pre_norm: false
+    final_norm: true
+    fc_norm: null
+    pool_include_prefix: false
+    drop_rate: 0.0
+    pos_drop_rate: 0.0
+    patch_drop_rate: 0.0
+    proj_drop_rate: 0.0
+    attn_drop_rate: 0.0
+    drop_path_rate: 0.0
+    weight_init: "timm"
+    init_variance_rescale: false
+  transformer_engine: false
+  channels_last: false
+
+optimizer:
+  lr: 1e-4
+  betas: [0.9, 0.98]
+  eps: 1e-8
+  weight_decay: 0.01
+
+distributed:
+  dp_inter: 1
+  dp_shard: 1
+  cp: 1
+  tp: 1
+
+fsdp:
+  init_model_with_meta_device: true
+  zero_dp_strategy: "optim_grads_params"
+  fsdp_unit_modules:
+    - vit.Block
+    - vit.PatchEmbed
+    - torch.nn.LayerNorm
+    - torch.nn.Linear
+  use_hybrid_fsdp: true
+  outer_dp_sharding_strategy: "optim"
+  grad_reduce_in_fp32: false
+  preserve_fp32_weights: true
+
+training:
+  steps: 500
+  val_interval: 25
+  log_interval: 5
+  checkpoint:
+    path: null
+    resume_from_metric: null
+
+dataset:
+  num_workers: 0
+  train:
+    root: null
+    class_map: null
+    label_map: null
+    class_filter: null
+    batch_size: 1
+    shuffle: false
+    transform_kwargs:
+      img_size: 224
+      scale: null
+      ratio: null
+      train_crop_mode: null
+      hflip: 0.5
+      vflip: 0.
+      color_jitter: 0.4
+      color_jitter_prob: null
+      grayscale_prob: 0.
+      gaussian_blur_prob: 0.
+      interpolation: 'random'
+      re_prob: 0.
+      re_mode: 'const'
+      re_count: 1
+      re_num_splits: 0
+      normalize: True
+      separate: False
+      patch_size: 16
+      patchify: False
+  val:
+    root: null
+    class_map: null
+    label_map: null
+    class_filter: null
+    batch_size: 1
+    shuffle: false
+    transform_kwargs:
+      img_size: 224
+      crop_pct: null
+      crop_mode: null
+      crop_border_pixels: null
+      interpolation: "bilinear"
+      mean: [0.485, 0.456, 0.406]
+      std: [0.229, 0.224, 0.225]
+      normalize: true
+      patch_size: 16
+      patchify: false
+
+random:
+  seed: 42
+
+profiling:
+  torch_memory_profile: false
+  wandb: false
@@ -0,0 +1,97 @@
+defaults:
+  - defaults
+  - _self_
+
+model:
+  vit:
+    img_size: 224
+    patch_size: 16
+    in_chans: 3
+    num_classes: 100000
+    global_pool: "map"
+    embed_dim: 768
+    depth: 12
+    num_heads: 12
+    mlp_ratio: 4.0
+    qkv_bias: true
+    qk_norm: true
+    scale_attn_norm: true
+    scale_mlp_norm: true
+    proj_bias: true
+    init_values: null
+    class_token: true
+    pos_embed: true
+    no_embed_class: false
+    reg_tokens: 8
+    pre_norm: true
+    final_norm: true
+    fc_norm: true
+    pool_include_prefix: false
+    drop_rate: 0.05
+    pos_drop_rate: 0.05
+    patch_drop_rate: 0.05
+    proj_drop_rate: 0.05
+    attn_drop_rate: 0.05
+    drop_path_rate: 0.05
+    weight_init: null
+    init_variance_rescale: true
+  transformer_engine: false
+  channels_last: false
+
+distributed:
+  dp_inter: 1
+  dp_shard: 1
+  cp: 1
+  tp: 1
+
+fsdp:
+  init_model_with_meta_device: true
+  zero_dp_strategy: 3
+  fsdp_unit_modules:
+    - vit.Block
+    - vit.PatchEmbed
+    - torch.nn.LayerNorm
+    - torch.nn.Linear
+  use_hybrid_fsdp: true
+  outer_dp_sharding_strategy: 1
+  grad_reduce_in_fp32: false
+  preserve_fp32_weights: true
+
+training:
+  steps: 500
+  val_interval: 25
+  log_interval: 5
+  checkpoint:
+    path: "./checkpoints/vit"
+    resume_from_metric: "-"   # + = Highest Metric (Score), - = Lowest Metric (Loss)
+
+dataset:
+  num_workers: 4
+  train:
+    root: "./data/super-tiny-imagenet-5/train"
+    class_map: "./data/super-tiny-imagenet-5/words.txt"
+    label_map: null   # Not needed, training data is labeled by directory.
+    class_filter: null
+    batch_size: 5
+    shuffle: true
+  val:
+    root: "./data/super-tiny-imagenet-5/val"
+    class_map: "./data/super-tiny-imagenet-5/words.txt"
+    label_map: "./data/super-tiny-imagenet-5/val/val_annotations.txt"
+    class_filter: null
+    batch_size: 5
+    shuffle: false
+
+random:
+  seed: 42
+
+profiling:
+  torch_memory_profile: false
+  torch_memory_profile_kwargs:
+    max_entries: 250000
+  wandb: false
+  wandb_kwargs:
+    # To use WandB, export WANDB_API_KEY=<your_api_key>!
+    name: "bionemo-vit"
+    project: "bionemo-recipes"
+    dir: null
@@ -0,0 +1,12 @@
+defaults:
+  - defaults
+  - vit_base_patch16_224
+  - _self_
+
+model:
+  transformer_engine: true
+
+training:
+  checkpoint:
+    path: "./checkpoints/vit_te"
+    resume_from_metric: "-"   # + = Highest Metric (Score), - = Lowest Metric (Loss)