Skip to content

Commit 8a9ea72

Browse files
authored
Iss129 hs (#132)
## Description <!-- Brief description of the changes --> ## Type of Change - [ ] Bug fix - [ ] New feature - [ ] Breaking change - [ ] Documentation ## Testing - [ ] Tests pass locally - [ ] New tests added (if applicable) ## Related Issues Closes #
2 parents 10faf5d + f0c6ae5 commit 8a9ea72

284 files changed

Lines changed: 84271 additions & 123 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

configs/prune_llm/README.md

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,14 +13,19 @@ Configurations for generating results in the SCAR LLM pruning paper.
1313

1414
## Quick Start
1515

16-
Run all experiments:
16+
Run single model:
1717
```bash
18-
bash slurm_jobs/prune_llm/run_all_paper.sh
18+
python scripts/run_experiment.py --config configs/prune_llm/llama3_8b_unified.yaml
1919
```
2020

21-
Run single model:
21+
Paper batch launchers now live under:
2222
```bash
23-
python scripts/run_experiment.py --config configs/prune_llm/llama3_8b_unified.yaml
23+
drafts/LLM_prune/paper/slurm_jobs/
24+
```
25+
26+
See:
27+
```bash
28+
drafts/LLM_prune/paper/slurm_jobs/README.md
2429
```
2530

2631
Override base output directory:
Lines changed: 146 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,146 @@
1+
# ============================================================================
2+
# LLAMA-3.1-70B PAPER-SAFE BENCHMARK RERUN: 50% FFN PRUNING + 8B-MATCHED TASKS
3+
# ============================================================================
4+
#
5+
# Purpose:
6+
# - Upgrade the preliminary 70B benchmark table from 100 samples/task to a more
7+
# defensible evaluation budget for main-paper use.
8+
# - Match the 8B benchmark suite more closely by including ARC-E and OBQA.
9+
# - Keep the method set focused on the main comparisons that matter in the paper.
10+
#
11+
# Recommended use:
12+
# - Promote 70B task results into the main paper only after this rerun finishes.
13+
# ============================================================================
14+
15+
experiment:
16+
name: "llama3_70b_scale_benchmarks_50_papersafe"
17+
type: "llm_alignment"
18+
output_dir: "./results/paper/llama3_70b_scale_benchmarks_50_papersafe"
19+
seed: 42
20+
device: "cuda"
21+
save_activations: false
22+
num_networks: 1
23+
24+
model:
25+
name: "hf_causal_lm"
26+
model_id: "meta-llama/Llama-3.1-70B"
27+
dtype: "bfloat16"
28+
device_map: "auto"
29+
trust_remote_code: true
30+
tracked_layers:
31+
- "model.model.layers.*.mlp.up_proj"
32+
- "model.model.layers.*.mlp.gate_proj"
33+
- "model.model.layers.*.mlp.down_proj"
34+
35+
dataset:
36+
name: "wikitext"
37+
batch_size: 1
38+
num_workers: 0
39+
40+
calibration:
41+
dataset: "wikitext"
42+
subset: "wikitext-2-raw-v1"
43+
split: "train"
44+
num_samples: 64
45+
max_length: 512
46+
batch_size: 1
47+
48+
metrics:
49+
enabled:
50+
- "activation_l2_norm"
51+
num_samples: 64
52+
53+
do_scar_metrics: true
54+
scar_num_samples: 64
55+
scar_max_length: 512
56+
57+
llm:
58+
scar_metrics: true
59+
scar_num_samples: 64
60+
scar_max_length: 512
61+
evaluate_perplexity: true
62+
evaluation_num_samples: 500
63+
use_nvidia_fewshot: true
64+
perplexity_protocol: "oats"
65+
wikitext_subset: "wikitext-2-raw-v1"
66+
perplexity_seq_len: 2048
67+
68+
evaluation_metrics:
69+
- "perplexity"
70+
- "accuracy_mmlu"
71+
- "accuracy_hellaswag"
72+
- "accuracy_piqa"
73+
- "accuracy_boolq"
74+
- "accuracy_winogrande"
75+
- "accuracy_arc_easy"
76+
- "accuracy_arc_challenge"
77+
- "accuracy_openbookqa"
78+
79+
analysis:
80+
generate_plots: false
81+
save_scores: true
82+
83+
generate_plots: false
84+
save_scores: true
85+
86+
do_connectivity_pruning: true
87+
do_directed_redundancy: false
88+
do_halo_analysis: false
89+
do_generalized_importance: false
90+
91+
supernode:
92+
enabled: true
93+
score_metric: "scar_loss_proxy"
94+
core_fraction: 0.01
95+
follower_fraction: 0.10
96+
halo_fraction: 0.10
97+
connectivity_topk: 256
98+
connectivity_rank_normalize: false
99+
connectivity_power: 1.0
100+
protect_core: true
101+
protect_core_metrics:
102+
- "scar_loss_proxy"
103+
- "supernode_protection_score"
104+
- "supernode_connectivity_score"
105+
cross_layer_analysis: false
106+
compare_by_connection: true
107+
compute_metrics:
108+
- "activation"
109+
110+
supernode_robustness:
111+
enabled: false
112+
113+
supernode_summary:
114+
enabled: false
115+
outlier_analysis: false
116+
117+
halo_analysis:
118+
enabled: false
119+
120+
cross_layer:
121+
enabled: false
122+
123+
generalized_importance:
124+
enabled: false
125+
126+
pruning:
127+
enabled: true
128+
target: "ffn"
129+
structured: true
130+
dependency_aware: true
131+
distribution: "uniform"
132+
min_per_layer: 0.0
133+
max_per_layer: 0.95
134+
135+
sparsity_levels: [0.5]
136+
selection_modes: ["low"]
137+
algorithms:
138+
- "scar_loss_proxy"
139+
- "supernode_protection_score"
140+
- "supernode_connectivity_score"
141+
- "activation_l2_norm"
142+
- "weight_magnitude"
143+
- "wanda"
144+
145+
evaluation:
146+
enabled: true
Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,126 @@
1+
# ============================================================================
2+
# LLAMA-3.1-70B SCALE CHECK: MECHANISM / CONCENTRATION ONLY
3+
# ============================================================================
4+
#
5+
# Purpose:
6+
# - Test whether the main supernode concentration phenomenon persists at 70B.
7+
# - Keep the run limited to the reviewer-relevant scale question:
8+
# - LP concentration / supernode identification
9+
# - optional outlier summary
10+
# - Skip pruning sweeps, downstream tasks, halo controls, and true ablation probes.
11+
#
12+
# This is designed as a rebuttal-focused replication, not a full paper rerun.
13+
# ============================================================================
14+
15+
experiment:
16+
name: "llama3_70b_scale_mechanism"
17+
type: "llm_alignment"
18+
output_dir: "./results/paper/llama3_70b_scale_mechanism"
19+
seed: 42
20+
device: "cuda"
21+
save_activations: false
22+
num_networks: 1
23+
24+
model:
25+
name: "hf_causal_lm"
26+
model_id: "meta-llama/Llama-3.1-70B"
27+
dtype: "bfloat16"
28+
device_map: "auto"
29+
trust_remote_code: true
30+
tracked_layers:
31+
- "model.model.layers.*.mlp.up_proj"
32+
- "model.model.layers.*.mlp.gate_proj"
33+
- "model.model.layers.*.mlp.down_proj"
34+
35+
dataset:
36+
name: "wikitext"
37+
batch_size: 1
38+
num_workers: 0
39+
40+
calibration:
41+
dataset: "wikitext"
42+
subset: "wikitext-2-raw-v1"
43+
split: "train"
44+
num_samples: 64
45+
max_length: 512
46+
batch_size: 1
47+
48+
metrics:
49+
enabled:
50+
- "rayleigh_quotient"
51+
num_samples: 64
52+
rayleigh_quotient:
53+
relative: true
54+
regularization: 1.0e-6
55+
56+
do_scar_metrics: true
57+
scar_num_samples: 64
58+
scar_max_length: 512
59+
60+
llm:
61+
scar_metrics: true
62+
scar_num_samples: 64
63+
scar_max_length: 512
64+
evaluate_perplexity: false
65+
evaluation_metrics: []
66+
wikitext_subset: "wikitext-2-raw-v1"
67+
68+
analysis:
69+
generate_plots: false
70+
save_scores: true
71+
72+
generate_plots: false
73+
save_scores: true
74+
75+
do_connectivity_pruning: false
76+
do_directed_redundancy: false
77+
do_halo_analysis: false
78+
do_generalized_importance: false
79+
80+
supernode:
81+
enabled: true
82+
score_metric: "scar_loss_proxy"
83+
core_fraction: 0.01
84+
follower_fraction: 0.10
85+
halo_fraction: 0.10
86+
connectivity_topk: 256
87+
connectivity_rank_normalize: false
88+
connectivity_power: 1.0
89+
protect_core: true
90+
protect_core_metrics:
91+
- "scar_loss_proxy"
92+
cross_layer_analysis: false
93+
compare_by_connection: false
94+
compute_metrics:
95+
- "activation"
96+
97+
read_halo_analysis:
98+
enabled: false
99+
100+
conditional_halo_ablation:
101+
enabled: false
102+
103+
lp_ablation_validation:
104+
enabled: false
105+
106+
supernode_robustness:
107+
enabled: false
108+
109+
supernode_summary:
110+
enabled: true
111+
outlier_analysis: true
112+
113+
halo_analysis:
114+
enabled: false
115+
116+
cross_layer:
117+
enabled: false
118+
119+
generalized_importance:
120+
enabled: false
121+
122+
pruning:
123+
enabled: false
124+
125+
evaluation:
126+
enabled: false

0 commit comments

Comments
 (0)