docs: add docs

kmehant · kmehant · commit efbd220ca96c · 2025-09-26T03:00:14.000+05:30
Signed-off-by: Mehant Kammakomati &lt;mehant.kammakomati2@ibm.com&gt;
diff --git a/plugins/online-data-mixing/README.md b/plugins/online-data-mixing/README.md
@@ -13,6 +13,42 @@ Plugin | Description | Depends | Loading | Augmentation | Callbacks
 
 ## Usage in Custom Training Loop
 
+![](./artifacts/plot.png)
+
+`OnlineMixingDataset` can be imported easily and integrated into existing training loops with minimal changes. A sample custom training loop implementation can be found [here](./artifacts/custom_loop_usage.py). Given code sample uses two instruction tuning datasets and trains `ibm-granite/granite-3.1-2b-instruct` model for next token prediction task.
+
+## Metrics
+
+All metrics related to the online data mixing will be logged to `odm.jsonl` file in the checkpoint output directory.
+
+Metric | Description
+--|--
+`samples_produced_so_far` | Total samples produced by the dataset so far at the time of logging.
+`sampling_interval` | Takes sample count "n" as input. At every "n" steps category/dataset chosen by weighted random sampling where weights are provided by the Multi-Armed Bandit algorithm.
+`total_categories` | Total categories or datasets involved in mixing.
+`current_sampling_weights` | Current state of the sampling weights at the time of logging.
+`current_sampling_ratio` | Current state of the sampling ratios at the time of logging.
+`arm_idx` | Last sampled category index. Categories/datasets are sorted in ascending order based on their names and index starts from 0 and each index corresponds to respective category/dataset.
+`category_level_counts_so_far` | Split of sample count across datasets so far at the time of logging.
+`rewards` | State of the rewards at the time of logging. Essentially are the last provided rewards across datasets.
+`action` | Type of action took place at the time logging. It is either "update" or "sample" which correspond to weight update of the MAB algorithm or category sampling.
+
+## Rewards
+
+Below are the currently available rewards and we are constantly looking to improve the existing rewards and also add new ones. Further, we encourage users to identify rewards that can help their usecases.
+
+Rewards | Description
+--|--
+`ENTROPY` | Calculation of shannon entropy of the logits averaged across all the tokens. Higher entropy would mean model requires more samples from that datasets/category.
+`ENTROPY3_VARENT1` | 3 parts of shannon entropy and 1 part of variance of the entropy. Higher values mean requirement of more samples.
+`ENTROPY_LAST_TOKEN` | Shannon entropy of the last token in the sample. Higher values mean requirement of more samples.
+`TRAIN_LOSS` | Training loss where loss is maintained across categories and is updated based on the latest loss and sampled dataset/category. Higher values mean requirement of more samples.
+`VALIDATION_LOSS` | Validation loss across categories calculated using evaluation datasets from each of the categories. Higher values mean requirement of more samples.
+`GRADNORM` | Gradient norm where norms are maintained across categories and are updated based on the latest values and sampled dataset/category. Higher values mean reducing samples from that particular dataset/category.
+
+### Adding a Custom Reward
+Custom rewards can be added to the `compute_reward` function and adding it to the `Reward` enum. If the custom reward requires specific set of information from the training loop then `_extract_information_from_state_for_reward` function has to be extended for extracting such information from trainer state. This is member function of `OnlineMixingDataset`.
+
 
 ### Planned TODOs
 Please see issue [#153](https://github.com/foundation-model-stack/fms-acceleration/issues/153).
diff --git a/plugins/online-data-mixing/artifacts/custom_loop_usage.py b/plugins/online-data-mixing/artifacts/custom_loop_usage.py
@@ -1,10 +1,14 @@
+# Run commmand
+# CUDA_VISIBLE_DEVICES=0,1 accelerate launch --config_file fms-acceleration/scripts/benchmarks/accelerate.yaml
+# --num_processes=2 --main_process_port=29511 custom_loop_usage.py
+
 # Standard
 import json
 import os
 
 # Third Party
 from accelerate import Accelerator, DataLoaderConfiguration
-from datasets import concatenate_datasets, load_dataset
+from datasets import load_dataset
 from torch.utils.data import DataLoader
 from tqdm import tqdm
 from transformers import (
@@ -19,10 +23,14 @@
 
 model_name = "ibm-granite/granite-3.1-2b-instruct"
 output_dir = "./odm_custom_use"
-max_steps = 400
+max_steps = 125
 batch_size = 12
 log_file = os.path.join(output_dir, "loss.jsonl")
 
+# odm related
+step_idx = 0
+update_interval = 1  # every step
+
 # model
 model = AutoModelForCausalLM.from_pretrained(model_name)
 
@@ -38,10 +46,6 @@ def tokenize_fn(examples):
     )
 
 
-# Third Party
-from datasets import load_dataset
-from transformers import AutoTokenizer, DataCollatorForLanguageModeling
-
 dataset_dict = {
     "alpaca": load_dataset("tatsu-lab/alpaca", split="train[:1%]"),
     "oasst": load_dataset("hakurei/open-instruct-v1", split="train[:1%]"),
@@ -53,8 +57,6 @@ def format_example(example):
         prompt = f"Instruction: {example['instruction']}\nInput: {example.get('input','')}\nOutput: {example['output']}"
     elif "text" in example:
         prompt = example["text"]
-    else:
-        raise ValueError("Dataset schema not supported")
     return {"text": prompt}
 
 
@@ -83,8 +85,7 @@ def tokenize_fn(examples):
     for name in dataset_dict
 }
 
-# odm related
-update_interval = 1  # every step
+# dataset preparation
 dataset = OnlineMixingDataset(
     dataset_dict=dataset_dict,
     collators_dict=collator_dict,
@@ -104,17 +105,17 @@ def tokenize_fn(examples):
 # training setup
 optimizer = torch.optim.AdamW(model.parameters(), lr=5e-5)
 
-model.train()
-
-step_idx = 0
-
 
+# Trainer state
 class State:
     log_history: list = []
 
 
 state = State()
+
+
 # custom training loop
+model.train()
 for step, batch in enumerate(
     tqdm(dataloader, disable=not accelerator.is_local_main_process)
 ):
@@ -141,7 +142,4 @@ class State:
     if step_idx > max_steps:
         break
 
-print("training completed!")
-
-
-# CUDA_VISIBLE_DEVICES=0,1 accelerate launch --config_file /workspace/fms-acceleration/scripts/benchmarks/accelerate.yaml --num_processes=2 --main_process_port=29511 custom_loop_usage.py
+print("Training completed!")
diff --git a/plugins/online-data-mixing/src/fms_acceleration_odm/odm/dataset.py b/plugins/online-data-mixing/src/fms_acceleration_odm/odm/dataset.py
@@ -154,7 +154,7 @@ def __init__(
             "total_categories": self.total_categories,
             "current_sampling_weights": self.sampling_weights.tolist(),
             "current_sampling_ratio": self.sampling_ratio,
-            "arm_dix": self.arm_idx,
+            "arm_idx": self.arm_idx,
             "category_level_counts_so_far": self.curr_cat_count,
             "rewards": [0] * self.total_categories,
             "count": 0,
@@ -223,7 +223,7 @@ def __next__(self):
 
         self.log_to_file(
             {
-                "arm_dix": self.arm_idx,
+                "arm_idx": self.arm_idx,
                 "samples_produced_so_far": self.produced,
                 "category_level_counts_so_far": self.curr_cat_count,
                 "action": "sample",