|
| 1 | +defaults: |
| 2 | + - ../llama-3_1-8B_pruneffn_memory/pruning/ffn_pruning@pruning |
| 3 | + - ../llama-3_1-8B_pruneffn_memory/validate_solutions_defaults@scoring |
| 4 | + - ../llama-3_1-8B_pruneffn_memory/validate_solutions_defaults@realize_model |
| 5 | + - bypass: |
| 6 | + - override hydra/hydra_logging: disabled |
| 7 | + - _self_ |
| 8 | + |
| 9 | +puzzle_dir: ??? |
| 10 | +descriptor: llama |
| 11 | +teacher_dir: ${puzzle_dir}/ckpts/teacher/ |
| 12 | +replacement_library_path: ${puzzle_dir}/replacement_library.json |
| 13 | +dataset_path: ??? # ppath to Nemotron-Post-Training-Dataset-v2 |
| 14 | + |
| 15 | +skip_realize_model: false |
| 16 | + |
| 17 | +build_replacement_library: |
| 18 | + add_ffn_no_ops: true |
| 19 | + add_attention_no_ops: true |
| 20 | + |
| 21 | +calc_subblock_stats: |
| 22 | + batch_sizes: [1, 4] |
| 23 | + prefill_seq_len: 1024 |
| 24 | + generation_seq_len: 1024 |
| 25 | + num_active_tokens_override: # Optional override for sequence lengths |
| 26 | + prefill_queue_size: 0 |
| 27 | + allocate_prefill_query: false |
| 28 | + merge_with_existing_stats: false |
| 29 | + subblock_stats_filename: "subblock_stats.json" |
| 30 | + moe_stats_filename: "moe_stats.json" |
| 31 | + |
| 32 | +scoring: |
| 33 | + descriptor: ${descriptor} |
| 34 | + solutions_to_validate: |
| 35 | + skip_existing_solutions: true |
| 36 | + |
| 37 | + replacement_library_path: ${replacement_library_path} |
| 38 | + solutions_path: ${to_path:${puzzle_dir}/single_sequence_replacement_solutions.json} |
| 39 | + teacher_dir: ${to_path:${teacher_dir}} |
| 40 | + output_dir: ${puzzle_dir}/single_sequence_replacement_solutions--validation |
| 41 | + |
| 42 | + eval_samples: 128 |
| 43 | + micro_batch_size: 1 |
| 44 | + seed: 42 |
| 45 | + shuffle_seed: 444 |
| 46 | + dataset_path: ${dataset_path} |
| 47 | + |
| 48 | +mip: |
| 49 | + single_block_replacement_validation_dir: ${to_path:${scoring.output_dir}} |
| 50 | + subblock_stats_path: ${to_path:${puzzle_dir}/${calc_subblock_stats.subblock_stats_filename}} |
| 51 | + output_path: ${to_path:${puzzle_dir}/mip/puzzle_solutions} |
| 52 | + gathered_metrics_path: |
| 53 | + puzzle_profile: |
| 54 | + |
| 55 | + # puzzle_profile: |
| 56 | + objective: metrics.cosine_embedding_loss_hidden_states |
| 57 | + bigger_is_better: false |
| 58 | + |
| 59 | + subblock_stats_args: |
| 60 | + - batch_size: 1 |
| 61 | + weights_dtype: torch.bfloat16 |
| 62 | + |
| 63 | + report_additional_costs: |
| 64 | + - stats.memory_mib |
| 65 | + - stats.num_params |
| 66 | + - stats.num_kv_heads |
| 67 | + - stats.has_attention |
| 68 | + - stats.has_ffn |
| 69 | + - stats.kv_cache_memory_mib |
| 70 | + - stats.attention_memory_mib |
| 71 | + - stats.ffn_memory_mib |
| 72 | + - stats.ffn_num_params |
| 73 | + - stats.attention_num_params |
| 74 | + |
| 75 | + human_constraints: |
| 76 | + target_latency_seconds: 5 |
| 77 | + |
| 78 | + mip_constraints: |
| 79 | + metric_overrides: |
| 80 | + max_seconds_per_solution: 60 |
| 81 | + |
| 82 | +realize_model: |
| 83 | + descriptor: ${descriptor} |
| 84 | + teacher_dir: ${to_path:${teacher_dir}} |
| 85 | + tokenizer_name: ${to_path:${teacher_dir}} |
| 86 | + replacement_library_path: ${replacement_library_path} |
| 87 | + save_models: true |
| 88 | + solutions_path: # Filled dynamically |
| 89 | + |
| 90 | + # Validate params |
| 91 | + skip_validation: false # To enable validation of the model solution set `skip_validation` as False |
| 92 | + eval_samples: 128 |
| 93 | + micro_batch_size: 1 |
| 94 | + seed: 42 |
| 95 | + shuffle_seed: 444 |
| 96 | + dataset_path: ${dataset_path} |
| 97 | + |
| 98 | +nccl_timeout_minutes: ${timedelta_minutes:120} |
| 99 | + |
| 100 | +# This section redirects Hydra outputs |
| 101 | +hydra: |
| 102 | + run: |
| 103 | + dir: ${puzzle_dir}/hydra_logs/${now:%Y-%m-%d}/${now:%H-%M-%S} |
0 commit comments