I tried to run this code on sst-2 dataset, but when I run masked_blockwise_run_glue.py, I get the following error, can you please explain?
thanks in advance.
`
mkdir -p ${OUTPUT_PATH_sst_2}
export CUDA_VISIBLE_DEVICES=0; python masked_blockwise_run_glue.py --output_dir ${OUTPUT_PATH_sst_2} --data_dir ${DATA_DIR_SST_2}
--task_name sst-2 --do_train --do_eval --do_lower_case --model_type masked_bert --local_rank -1
--model_name_or_path ${teacher_path_sst_2_partial} --per_gpu_train_batch_size 32 --overwrite_output_dir
--warmup_steps 11000 --num_train_epochs 20 --max_seq_length 128 --block_rows ${block_rows} --block_cols ${block_cols}
--learning_rate 3e-05 --mask_scores_learning_rate 1e-2 --evaluate_during_training
--logging_steps 3500 --save_steps 3500 --teacher_type masked_bert --teacher_name_or_path ${teacher_path_sst_2_partial}
--fp16 --final_threshold ${threshold} --final_lambda 20000 --pruning_method topK
--mask_init constant --mask_scale 0. | tee -a ${OUTPUT_PATH_sst_2}/training_log.txt
File "masked_blockwise_run_glue.py", line 943, in
main()
File "masked_blockwise_run_glue.py", line 918, in main
global_step, tr_loss = train(
File "masked_blockwise_run_glue.py", line 210, in train
outputs = model(**inputs)
File "/home/flyvideo/anaconda3/envs/prune-head/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1102, in _call_impl
return forward_call(*input, **kwargs)
File "/home/flyvideo/Sata/zy/MLPruning-main/training/emmental/modeling_bert_masked.py", line 854, in forward
outputs = self.bert(
File "/home/flyvideo/anaconda3/envs/prune-head/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1102, in _call_impl
return forward_call(*input, **kwargs)
File "/home/flyvideo/Sata/zy/MLPruning-main/training/emmental/modeling_bert_masked.py", line 780, in forward
encoder_outputs = self.encoder(
File "/home/flyvideo/anaconda3/envs/prune-head/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1102, in _call_impl
return forward_call(*input, **kwargs)
File "/home/flyvideo/Sata/zy/MLPruning-main/training/emmental/modeling_bert_masked.py", line 484, in forward
layer_outputs = layer_module(
File "/home/flyvideo/anaconda3/envs/prune-head/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1102, in _call_impl
return forward_call(*input, **kwargs)
File "/home/flyvideo/Sata/zy/MLPruning-main/training/emmental/modeling_bert_masked.py", line 435, in forward
self_attention_outputs = self.attention(
File "/home/flyvideo/anaconda3/envs/prune-head/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1102, in _call_impl
return forward_call(*input, **kwargs)
File "/home/flyvideo/Sata/zy/MLPruning-main/training/emmental/modeling_bert_masked.py", line 341, in forward
self_outputs = self.self(
File "/home/flyvideo/anaconda3/envs/prune-head/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1102, in _call_impl
return forward_call(*input, **kwargs)
File "/home/flyvideo/Sata/zy/MLPruning-main/training/emmental/modeling_bert_masked.py", line 227, in forward
mixed_query_layer = self.query(hidden_states)
File "/home/flyvideo/anaconda3/envs/prune-head/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1102, in _call_impl
return forward_call(*input, **kwargs)
File "/home/flyvideo/Sata/zy/MLPruning-main/training/emmental/modules/masked_nn.py", line 252, in forward
output = self.block_pruning_forward(input)
File "/home/flyvideo/Sata/zy/MLPruning-main/training/emmental/modules/masked_nn.py", line 258, in block_pruning_forward
tmp_weight = blockshaped(self.weight, self.block_rows, self.block_cols)
File "/home/flyvideo/Sata/zy/MLPruning-main/training/emmental/modules/masked_nn.py", line 27, in blockshaped
return (arr.reshape(h // nrows, nrows, -1, ncols)
RuntimeError: cannot reshape tensor of 0 elements into shape [0, 16, -1, 16] because the unspecified dimension size -1 can be any value and is ambiguous
`
I tried to run this code on sst-2 dataset, but when I run masked_blockwise_run_glue.py, I get the following error, can you please explain?
thanks in advance.
`
mkdir -p ${OUTPUT_PATH_sst_2}
export CUDA_VISIBLE_DEVICES=0; python masked_blockwise_run_glue.py --output_dir ${OUTPUT_PATH_sst_2} --data_dir ${DATA_DIR_SST_2}
--task_name sst-2 --do_train --do_eval --do_lower_case --model_type masked_bert --local_rank -1
--model_name_or_path ${teacher_path_sst_2_partial} --per_gpu_train_batch_size 32 --overwrite_output_dir
--warmup_steps 11000 --num_train_epochs 20 --max_seq_length 128 --block_rows ${block_rows} --block_cols ${block_cols}
--learning_rate 3e-05 --mask_scores_learning_rate 1e-2 --evaluate_during_training
--logging_steps 3500 --save_steps 3500 --teacher_type masked_bert --teacher_name_or_path ${teacher_path_sst_2_partial}
--fp16 --final_threshold ${threshold} --final_lambda 20000 --pruning_method topK
--mask_init constant --mask_scale 0. | tee -a ${OUTPUT_PATH_sst_2}/training_log.txt
File "masked_blockwise_run_glue.py", line 943, in
main()
File "masked_blockwise_run_glue.py", line 918, in main
global_step, tr_loss = train(
File "masked_blockwise_run_glue.py", line 210, in train
outputs = model(**inputs)
File "/home/flyvideo/anaconda3/envs/prune-head/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1102, in _call_impl
return forward_call(*input, **kwargs)
File "/home/flyvideo/Sata/zy/MLPruning-main/training/emmental/modeling_bert_masked.py", line 854, in forward
outputs = self.bert(
File "/home/flyvideo/anaconda3/envs/prune-head/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1102, in _call_impl
return forward_call(*input, **kwargs)
File "/home/flyvideo/Sata/zy/MLPruning-main/training/emmental/modeling_bert_masked.py", line 780, in forward
encoder_outputs = self.encoder(
File "/home/flyvideo/anaconda3/envs/prune-head/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1102, in _call_impl
return forward_call(*input, **kwargs)
File "/home/flyvideo/Sata/zy/MLPruning-main/training/emmental/modeling_bert_masked.py", line 484, in forward
layer_outputs = layer_module(
File "/home/flyvideo/anaconda3/envs/prune-head/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1102, in _call_impl
return forward_call(*input, **kwargs)
File "/home/flyvideo/Sata/zy/MLPruning-main/training/emmental/modeling_bert_masked.py", line 435, in forward
self_attention_outputs = self.attention(
File "/home/flyvideo/anaconda3/envs/prune-head/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1102, in _call_impl
return forward_call(*input, **kwargs)
File "/home/flyvideo/Sata/zy/MLPruning-main/training/emmental/modeling_bert_masked.py", line 341, in forward
self_outputs = self.self(
File "/home/flyvideo/anaconda3/envs/prune-head/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1102, in _call_impl
return forward_call(*input, **kwargs)
File "/home/flyvideo/Sata/zy/MLPruning-main/training/emmental/modeling_bert_masked.py", line 227, in forward
mixed_query_layer = self.query(hidden_states)
File "/home/flyvideo/anaconda3/envs/prune-head/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1102, in _call_impl
return forward_call(*input, **kwargs)
File "/home/flyvideo/Sata/zy/MLPruning-main/training/emmental/modules/masked_nn.py", line 252, in forward
output = self.block_pruning_forward(input)
File "/home/flyvideo/Sata/zy/MLPruning-main/training/emmental/modules/masked_nn.py", line 258, in block_pruning_forward
tmp_weight = blockshaped(self.weight, self.block_rows, self.block_cols)
File "/home/flyvideo/Sata/zy/MLPruning-main/training/emmental/modules/masked_nn.py", line 27, in blockshaped
return (arr.reshape(h // nrows, nrows, -1, ncols)
RuntimeError: cannot reshape tensor of 0 elements into shape [0, 16, -1, 16] because the unspecified dimension size -1 can be any value and is ambiguous
`