Skip to content

Commit eb493b5

Browse files
Merge branch 'main' into feat/chattemplatebase64
2 parents 87716f0 + a84b716 commit eb493b5

37 files changed

Lines changed: 294716 additions & 128 deletions

.pylintrc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -475,7 +475,7 @@ notes-rgx=
475475
[REFACTORING]
476476

477477
# Maximum number of nested blocks for function / method body
478-
max-nested-blocks=5
478+
max-nested-blocks=6
479479

480480
# Complete name of functions that never returns. When checking for
481481
# inconsistent-return-statements if a never returning function is called then

README.md

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -855,6 +855,9 @@ Notes:
855855
- When a boolean is passed, the expert parallel degree defaults to 1 and further the behaviour would be as follows:
856856
- if True, it is Scatter MoE Kernels with experts sharded based on the top level sharding protocol (e.g. FSDP).
857857
- if False, Scatter MoE Kernels with complete replication of experts across ranks.
858+
- FSDP must be used when lora tuning with `--fast_moe`
859+
- lora tuning with ScatterMoE is supported, but because of inference restrictions on vLLM/vanilla PEFT, the expert layers and router linear layer should not be trained as `target_modules` for models being tuned with ScatterMoE. Users have control over which `target_modules` they wish to train:
860+
- At this time, only attention layers are trainable when using LoRA with scatterMoE. Until support for the router linear layer is added in, target modules must be specified explicitly (i.e `target_modules: ["q_proj", "v_proj", "o_proj", "k_proj"]`) instead of passing `target_modules: ["all-linear"]`.
858861
- `world_size` must be divisible by the `ep_degree`
859862
- `number of experts` in the MoE module must be divisible by the `ep_degree`
860863
- Running fast moe modifies the state dict of the model, and must be post-processed which happens automatically and the converted checkpoint can be found at `hf_converted_checkpoint` folder within every saved checkpoint directory. Alternatively, we can perform similar option manually through [checkpoint utils](https://github.com/foundation-model-stack/fms-acceleration/blob/main/plugins/accelerated-moe/src/fms_acceleration_moe/utils/checkpoint_utils.py) script.
@@ -916,12 +919,12 @@ For information on supported dataset formats and how to tune a vision-language m
916919

917920
? May be supported, but not tested
918921

919-
Model Name & Size | Model Architecture | Full Finetuning |
920-
-------------------- | ---------------- | --------------- |
921-
Llama 3.2-11B Vision | MllamaForConditionalGeneration | ✅* |
922-
Llava 1.5-7B | LlavaForConditionalGeneration | ✅* |
923-
Granite 3.1-2B Vision | LlavaNextForConditionalGeneration | ✅* |
924-
Llava Mistral 1.6-7B | LlavaNextForConditionalGeneration | ✅* |
922+
Model Name & Size | Model Architecture | LoRA Tuning | Full Finetuning |
923+
-------------------- | ---------------- | --------------- | --------------- |
924+
Llama 3.2-11B Vision | MllamaForConditionalGeneration | ✅* |* |
925+
Llava 1.5-7B | LlavaForConditionalGeneration | ✅* |* |
926+
Granite 3.1-2B Vision | LlavaNextForConditionalGeneration | ✅* |* |
927+
Llava Mistral 1.6-7B | LlavaNextForConditionalGeneration | ✅* |* |
925928

926929
(*) - Supported with `fms-hf-tuning` v2.8.0 or later.
927930

build/Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -149,7 +149,7 @@ RUN --mount=type=cache,target=/home/${USER}/.cache/pip,uid=${USER_UID} \
149149
python -m pip install --user wheel && \
150150
python -m pip install --user "$(head bdist_name)" && \
151151
python -m pip install --user "$(head bdist_name)[flash-attn]" && \
152-
python -m pip install --user "$(head bdist_name)[mamba]"
152+
python -m pip install --user --no-build-isolation "$(head bdist_name)[mamba]"
153153

154154
# fms_acceleration_peft = PEFT-training, e.g., 4bit QLoRA
155155
# fms_acceleration_foak = Fused LoRA and triton kernels

build/accelerate_launch.py

Lines changed: 34 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,17 @@ def main():
146146
save_model_dir, save_model_dir, num_added_tokens
147147
)
148148

149+
# In case of ScatterMoE LoRa
150+
hf_converted_checkpoint = os.path.join(
151+
save_model_dir, "hf_converted_checkpoint"
152+
)
153+
if os.path.exists(
154+
os.path.join(hf_converted_checkpoint, "adapter_model.safetensors")
155+
):
156+
post_process_vLLM_adapters_new_tokens(
157+
hf_converted_checkpoint, hf_converted_checkpoint, num_added_tokens
158+
)
159+
149160
if (
150161
os.path.exists(os.path.join(output_dir, "added_tokens_info.json"))
151162
and job_config.get("save_strategy") != "no"
@@ -159,11 +170,30 @@ def main():
159170
for _, dirs, _ in os.walk(output_dir, topdown=False):
160171
for name in dirs:
161172
if "checkpoint-" in name.lower():
162-
post_process_vLLM_adapters_new_tokens(
163-
os.path.join(output_dir, name),
164-
os.path.join(output_dir, name),
165-
num_added_tokens,
173+
base_checkpoint_dir = os.path.join(output_dir, name)
174+
hf_converted_checkpoint = os.path.join(
175+
base_checkpoint_dir, "hf_converted_checkpoint"
176+
)
177+
178+
# Use hf_converted_checkpoint if exists, otherwise use base_checkpoint_dir
179+
checkpoint_dir = (
180+
hf_converted_checkpoint
181+
if os.path.exists(
182+
os.path.join(
183+
hf_converted_checkpoint, "adapter_model.safetensors"
184+
)
185+
)
186+
else base_checkpoint_dir
166187
)
188+
189+
if os.path.exists(
190+
os.path.join(checkpoint_dir, "adapter_model.safetensors")
191+
):
192+
post_process_vLLM_adapters_new_tokens(
193+
checkpoint_dir,
194+
checkpoint_dir,
195+
num_added_tokens,
196+
)
167197
else:
168198
logging.warning(
169199
"Failed to post-process: file added_tokens_info.json not in path %s",

docs/offline-data-preprocessing.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,8 @@ python scripts/offline_data_processing.py \
3737

3838
Additionally, once the offline data processing is complete, users can leverage the shards stored in `output_dir` for tuning by passing it through the `--training_data_path` flag or passing it via `data_paths` argument in data config yaml, provided they find the sharded datasets beneficial for training.
3939

40+
**NOTE**: The offline data preprocessing script is not compatible with processing image datasets for vision models.
41+
4042
## Example Usage
4143
### Applying Chat Template
4244

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ dependencies = [
3434
"sentencepiece>=0.1.99,<0.3",
3535
"tokenizers>=0.13.3,<1.0",
3636
"tqdm>=4.66.2,<5.0",
37-
"trl>=0.13,<0.17",
37+
"trl>=0.13,<0.18",
3838
"peft>=0.8.0,<0.14",
3939
"protobuf>=5.28.0,<6.0.0",
4040
"datasets>=2.15.0,<4.0",

tests/acceleration/test_acceleration_framework.py

Lines changed: 27 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -532,8 +532,8 @@ def test_framework_initialized_properly_moe():
532532
)
533533

534534
# spy inside the train to ensure that the ilab plugin is called
535-
assert spy["model_loader_calls"] == 1
536-
assert spy["augmentation_calls"] == 0
535+
assert spy["model_loader_calls"] == 0
536+
assert spy["augmentation_calls"] == 1
537537
assert spy["get_ready_for_train_calls"] == 1
538538

539539

@@ -776,37 +776,34 @@ def test_error_raised_fast_moe_with_non_moe_model():
776776
"""
777777
Ensure error is thrown when `--fast_moe` is passed and model is not MoE
778778
"""
779-
with pytest.raises(
780-
AttributeError,
781-
match="'LlamaConfig' object has no attribute 'num_local_experts'",
782-
):
783-
with tempfile.TemporaryDirectory() as tempdir:
779+
with tempfile.TemporaryDirectory() as tempdir:
784780

785-
model_args = copy.deepcopy(MODEL_ARGS)
786-
model_args.model_name_or_path = "TinyLlama/TinyLlama-1.1B-Chat-v0.3"
787-
model_args.torch_dtype = torch.bfloat16
788-
train_args = copy.deepcopy(TRAIN_ARGS)
789-
train_args.output_dir = tempdir
790-
train_args.save_strategy = "no"
791-
train_args.bf16 = True
792-
data_args = copy.deepcopy(DATA_ARGS)
793-
data_args.training_data_path = TWITTER_COMPLAINTS_JSON_FORMAT
794-
data_args.response_template = "\n\n### Label:"
795-
data_args.dataset_text_field = "output"
781+
model_args = copy.deepcopy(MODEL_ARGS)
782+
model_args.model_name_or_path = "TinyLlama/TinyLlama-1.1B-Chat-v0.3"
783+
model_args.torch_dtype = torch.bfloat16
784+
train_args = copy.deepcopy(TRAIN_ARGS)
785+
train_args.output_dir = tempdir
786+
train_args.save_strategy = "no"
787+
train_args.bf16 = True
788+
data_args = copy.deepcopy(DATA_ARGS)
789+
data_args.training_data_path = TWITTER_COMPLAINTS_JSON_FORMAT
790+
data_args.response_template = "\n\n### Label:"
791+
data_args.dataset_text_field = "output"
796792

797-
# initialize a config
798-
moe_config = FastMoeConfig(fast_moe=FastMoe(ep_degree=1))
793+
# initialize a config
794+
moe_config = FastMoeConfig(fast_moe=FastMoe(ep_degree=1))
799795

800-
# 1. mock a plugin class
801-
# 2. register the mocked plugins
802-
# 3. call sft_trainer.train
803-
with build_framework_and_maybe_instantiate(
804-
[
805-
(["training.moe.scattermoe"], ScatterMoEAccelerationPlugin),
806-
],
807-
instantiate=False,
808-
):
809-
with instantiate_model_patcher():
796+
# 1. mock a plugin class
797+
# 2. register the mocked plugins
798+
# 3. call sft_trainer.train
799+
with build_framework_and_maybe_instantiate(
800+
[
801+
(["training.moe.scattermoe"], ScatterMoEAccelerationPlugin),
802+
],
803+
instantiate=False,
804+
):
805+
with instantiate_model_patcher():
806+
with pytest.raises((ValueError, AttributeError)):
810807
sft_trainer.train(
811808
model_args,
812809
data_args,

tests/artifacts/testdata/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,7 @@
7474
CHAT_DATA_MULTI_TURN_GRANITE_3_1B = os.path.join(
7575
JSONL_DATA_DIR, "multi_turn_chat_granite_instruct.jsonl"
7676
)
77+
IMAGE_DATASET = os.path.join(JSONL_DATA_DIR, "image_dataset.jsonl")
7778
EMPTY_DATA = os.path.join(JSON_DATA_DIR, "empty_data.json")
7879
MALFORMATTED_DATA = os.path.join(JSON_DATA_DIR, "malformatted_data.json")
7980

tests/artifacts/testdata/jsonl/image_dataset.jsonl

Lines changed: 2 additions & 0 deletions
Large diffs are not rendered by default.
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
# Copyright The FMS HF Tuning Authors
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
"""Helpful saved vison models for unit tests.
16+
"""
17+
# Standard
18+
import os
19+
20+
### Constants used for model path
21+
PREDEFINED_MODEL_PATH = os.path.join(os.path.dirname(__file__))
22+
TINY_LLAMA_VISION_MODEL_NAME = os.path.join(
23+
PREDEFINED_MODEL_PATH, "tiny_llama_vision_model"
24+
)
25+
TINY_GRANITE_VISION_MODEL_NAME = os.path.join(
26+
PREDEFINED_MODEL_PATH, "tiny_granite_vision_model"
27+
)

0 commit comments

Comments
 (0)