diff --git a/docs/tuning-techniques.md b/docs/tuning-techniques.md index c7afab1979..8428b3f83b 100644 --- a/docs/tuning-techniques.md +++ b/docs/tuning-techniques.md @@ -24,7 +24,7 @@ ## LoRA Tuning Example -Set `peft_method` to `"lora"`. You can additionally pass any arguments from [LoraConfig](https://github.com/foundation-model-stack/fms-hf-tuning/blob/main/tuning/config/peft_config.py#L21). +Set `peft_method` to `"lora"`. You can additionally pass any arguments from [LoraConfig](https://huggingface.co/docs/peft/en/package_reference/lora#peft.LoraConfig). ```py # Args you can pass r: int =8 @@ -340,7 +340,7 @@ You can see details on a sample configuration of Accelerated GPTQ-LoRA [here](ht To use GPTQ-LoRA technique, you can set the `quantized_lora_config` defined [here](https://github.com/foundation-model-stack/fms-hf-tuning/blob/main/tuning/config/acceleration_configs/quantized_lora_config.py). See the Notes section of FMS Acceleration doc [below](https://github.com/foundation-model-stack/fms-hf-tuning/blob/main/README.md#fms-acceleration) for usage. The only kernel we are supporting currently is `triton_v2`. -In addition, LoRA tuning technique is required to be used, set `peft_method` to `"lora"` and pass any arguments from [LoraConfig](https://github.com/foundation-model-stack/fms-hf-tuning/blob/main/tuning/config/peft_config.py#L21). +In addition, LoRA tuning technique is required to be used, set `peft_method` to `"lora"` and pass any arguments from [LoraConfig](https://huggingface.co/docs/peft/en/package_reference/lora#peft.LoraConfig). Example command to run: diff --git a/tuning/config/peft_config.py b/tuning/config/peft_config.py index 27af853327..48f594393d 100644 --- a/tuning/config/peft_config.py +++ b/tuning/config/peft_config.py @@ -15,9 +15,10 @@ # Standard from dataclasses import dataclass, field from enum import Enum -from typing import List +from typing import List, Optional # Third Party +from peft import LoraConfig as HFLoraConfig from transformers.utils.quantization_config import Mxfp4Config as HfMxfp4Config @@ -40,49 +41,125 @@ def to_hf_config(self): @dataclass -class LoraConfig: +class LoraConfig(HFLoraConfig): """ - This is the configuration class to store the configuration of a [`LoraModel`]. + This is the configuration class that extends peft.LoraConfig with a few defaults. Args: - r (`int`): - Lora attention dimension (the "rank"). - target_modules (List[str]]): - The names of the modules to apply the adapter to. \ - If this is specified, only the modules with the specified \ - names will be replaced. Please specify modules as per model architecture. \ - If the value is ["all-linear"], \ - then LORA selects all linear and Conv1D modules as per model architecture, \ - except for the output layer. lora_alpha (`int`): The alpha parameter for Lora scaling. lora_dropout (`float`): The dropout probability for Lora layers. - bias (`str`): - Bias type for LoRA. Can be 'none', 'all' or 'lora_only'. \ - If 'all' or 'lora_only', the corresponding biases will be updated during training. \ - Be aware that this means that, even when disabling the adapters, the model \ - will not produce the same output as the base model would have without adaptation. """ - r: int = 8 lora_alpha: int = 32 - target_modules: List[str] = field( + lora_dropout: float = 0.05 + + # HACK: The following list of arguments listed below + # is a fix which reduces the field annotation from + # Optional[List[str], str] type to Optional[List[str]] type + # This is done for compatibility with HFArgumentParser + # Please see: https://github.com/huggingface/peft/issues/2798 for further explanation! + target_modules: Optional[List[str]] = field( default=None, metadata={ - "help": "The names of the modules to apply LORA to. LORA selects modules which either \ - completely match or " - 'end with one of the strings. If the value is ["all-linear"], \ - then LORA selects all linear and Conv1D ' - "modules except for the output layer." + "help": ( + "List of module names or regex expression of the module names to replace with LoRA." + "For example, ['q', 'v'] or '.*decoder.*(SelfAttention|EncDecAttention).*(q|v)$'. " + "This can also be a wildcard 'all-linear' which matches all linear/Conv1D " + "(if the model is a PreTrainedModel, the output layer excluded). " + "If not specified, modules will be chosen according to the model architecture, " + "If the architecture is not known, an error will be raised -- " + "in this case, you should specify the target modules manually. " + "To avoid targeting any modules (because you want to apply `target_parameters`) " + ", set `target_modules=[]`." + ), }, ) - target_parameters: List[str] = field( + exclude_modules: Optional[List[str]] = field( default=None, - metadata={"help": "The names/regex of the parameters to apply LORA to"}, + metadata={ + "help": ( + "List of module names or regex expression of the module names to exclude from Lora." + ) + }, ) - bias = "none" - lora_dropout: float = 0.05 + init_lora_weights: bool = field( + default=True, + metadata={ + "help": ( + "How to initialize the weights of the LoRA layers. " + "Passing True (default) results in the default initialization from " + "the reference implementation from " + "Microsoft, with the LoRA B weight being set to 0. " + "This means that without further training, " + "the LoRA adapter will be a no-op. " + "Setting the initialization to False leads to random initialization of " + "LoRA A and B, meaning that LoRA is not a no-op before training; " + "this setting is intended for debugging purposes." + ), + }, + ) + layers_to_transform: Optional[list[int]] = field( + default=None, + metadata={ + "help": ( + "The layer indexes to transform, is this argument is specified, " + "PEFT will transform only the layers indexes that are specified inside this list. " + "If a single integer is passed, PEFT will transform only the layer at this index. " + "This only works when target_modules is a list of str." + ) + }, + ) + layers_pattern: Optional[list[str]] = field( + default=None, + metadata={ + "help": ( + "The layer pattern name, used only if `layers_to_transform` is different to None " + "and if the layer pattern is not in the common layers pattern. " + "This only works when target_modules is a list of str. " + "This should target the `nn.ModuleList` of the " + "model, which is often called `'layers'` or `'h'`." + ) + }, + ) + trainable_token_indices: Optional[list[int]] = field( + default=None, + metadata={ + "help": ( + "Lets you specify which token indices to selectively fine-tune " + "without requiring to re-train the " + "whole embedding matrix using the `peft.TrainableTokensModel` method. " + "You can specify token indices in two ways. " + "Either you specify a list of indices which will then target the model's input " + "embedding layer (or, if not found, `embed_tokens`). " + "(Not supported yet) Alternatively, you can specify a dictionary " + "where the key is the name of the embedding module " + "and the values are the list of token indices, e.g. " + "`{'embed_tokens': [0, 1, ...]}`. Note that training " + "with FSDP requires `use_orig_params=True` to " + "avoid issues with non-uniform `requires_grad`." + ) + }, + ) + loftq_config: Optional[dict] = field( + default_factory=dict, + metadata={ + "help": ( + "The configuration of LoftQ. If this is passed, " + "then LoftQ will be used to quantize the backbone " + "weights and initialize Lora layers. Also set `init_lora_weights='loftq'` " + "in this case." + ) + }, + ) + + def __post_init__(self): + # If target_modules is a single-element list, convert it into a plain string + if self.target_modules == ["all-linear"]: + self.target_modules = "all-linear" + + super().__post_init__() @dataclass diff --git a/tuning/sft_trainer.py b/tuning/sft_trainer.py index 8fafc12da3..051db7f814 100644 --- a/tuning/sft_trainer.py +++ b/tuning/sft_trainer.py @@ -71,7 +71,7 @@ def train( data_args: configs.DataArguments, train_args: configs.TrainingArguments, peft_config: Optional[ # pylint: disable=redefined-outer-name - Union[peft_config.LoraConfig, LoraConfig, peft_config.PromptTuningConfig] + Union[LoraConfig, peft_config.PromptTuningConfig] ] = None, quantization_config: Optional[peft_config.Mxfp4Config] = None, trainer_controller_args: TrainerControllerCallback = None, @@ -92,8 +92,7 @@ def train( model_args: tuning.config.configs.ModelArguments data_args: tuning.config.configs.DataArguments train_args: tuning.config.configs.TrainingArguments - peft_config: peft_config.LoraConfig for Lora tuning | \ - LoraConfig (peft.LoraConfig): for activated Lora (aLoRA) tuning | \ + peft_config: LoraConfig (peft.LoraConfig): for activated Lora (aLoRA) tuning | \ peft_config.PromptTuningConfig for prompt tuning | \ None for full fine tuning The peft configuration to pass to trainer @@ -110,7 +109,8 @@ def train( tracker with automatically be added. exp_metadata: Dict of key value pairs passed to train to be recoreded by the tracker. quantized_lora_config: tuning.config.acceleration_configs.QuantizedLoraConfig \ - Should be used in combination with peft_config.LoraConfig for Lora tuning \ + Should be used in combination with LoraConfig for Lora tuning \ + https://huggingface.co/docs/peft/en/package_reference/lora#peft.LoraConfig \ fusedops_kernels_config: tuning.config.acceleration_configs.FusedOpsAndKernelsConfig \ Should be used in combination with quantized_lora_config. Also currently fused_lora and fast_kernels must used together (may change in future). \ @@ -845,9 +845,7 @@ def main(): ) sys.exit(INTERNAL_ERROR_EXIT_CODE) - if isinstance( - tune_config, (peft_config.LoraConfig, LoraConfig) - ): # aLoraConfig subclasses LoraConfig + if isinstance(tune_config, LoraConfig): # aLoraConfig subclasses LoraConfig try: if training_args.save_model_dir: # Write number of added tokens to artifacts diff --git a/tuning/utils/config_utils.py b/tuning/utils/config_utils.py index 78c4d32ab2..45fda027fe 100644 --- a/tuning/utils/config_utils.py +++ b/tuning/utils/config_utils.py @@ -20,7 +20,6 @@ import pickle # Third Party -from peft import LoraConfig as HFLoraConfig from peft import PromptTuningConfig as HFPromptTuningConfig # Local @@ -112,10 +111,13 @@ def get_hf_peft_config(task_type, tuning_config, tokenizer_name_or_path): alora_config.task_type = task_type hf_peft_config = alora_config elif isinstance(tuning_config, peft_config.LoraConfig): - lora_config = asdict(tuning_config) - if lora_config["target_modules"] == ["all-linear"]: - lora_config["target_modules"] = "all-linear" - hf_peft_config = HFLoraConfig(task_type=task_type, **lora_config) + if getattr(tuning_config, "target_modules") == ["all-linear"]: + setattr(tuning_config, "target_modules", "all-linear") + + if getattr(tuning_config, "task_type") is None: + setattr(tuning_config, "task_type", task_type) + + hf_peft_config = tuning_config elif isinstance(tuning_config, peft_config.PromptTuningConfig): hf_peft_config = HFPromptTuningConfig( task_type=task_type,