|
| 1 | +# FMS Acceleration Plugin Configuration. |
| 2 | +# |
| 3 | +# Each stanza incorporates various configurations for |
| 4 | +# different fine-tuning / training tasks. |
| 5 | +plugins: |
| 6 | + # Configurations to accelerate data packing/padding in training |
| 7 | + training: |
| 8 | + |
| 9 | + # attention module configurations |
| 10 | + # e.g. padding-free modifications to attention layer |
| 11 | + attention: |
| 12 | + |
| 13 | + # this controls the confgurations for padding free computation of flash attention |
| 14 | + padding_free: |
| 15 | + method: huggingface |
| 16 | + fused_ops_and_kernels: |
| 17 | + |
| 18 | + # if under training stanza, then putting |
| 19 | + # base_layer and fused_lora will be a misnomer |
| 20 | + # - this should be in peft.quantized |
| 21 | + # However, if it is specified, it will still |
| 22 | + # be read. This is useful in use cases where |
| 23 | + # the yaml is system generated and not shown |
| 24 | + # to a user. |
| 25 | + |
| 26 | + # activate various unsloth optimizations |
| 27 | + # there are two versions of the plugin |
| 28 | + # - the FastKernel version supports individual kernels |
| 29 | + # - the FastQuantized version is all-or-nothing |
| 30 | + |
| 31 | + # fast loss triton kernels |
| 32 | + fast_loss: true |
| 33 | + |
| 34 | + # fast rms norm triton kernels |
| 35 | + fast_rms_layernorm: true |
| 36 | + |
| 37 | + # fast RoPE embedding triton kernels |
| 38 | + fast_rope_embeddings: true |
| 39 | + moe: |
| 40 | + |
| 41 | + # expert-parallel for MoE |
| 42 | + scattermoe: |
| 43 | + |
| 44 | + # The level of expert parallel sharding. |
| 45 | + # - 1 means no sharding |
| 46 | + # - if > 1, please ensure that this divides the world_size. This is because |
| 47 | + # the devices will be replicated for every ep_degree devices, and |
| 48 | + # the experts will be sharded within each group. |
| 49 | + # - if > 1, also ensure that it divides the number of experts, as each device |
| 50 | + # will then have num_of_experts / ep_degree experts. |
| 51 | + ep_degree: 8 |
0 commit comments