@@ -210,26 +210,29 @@ move_profile_logs() {
210210# For checkpoint-related args, automatically isolate by model and run mode
211211# (resume/no_resume) to avoid cross-test overwrites in one-click runs.
212212args_string_for_test () {
213- local idx=" $1 "
214- local model_name=" $2 "
215- jq -r --argjson i " $idx " --arg model " $model_name " '
216- def namespaced_path($p; $model; $mode):
217- if ($p | test("/checkpoint_step_[0-9]+($|/)")) then
218- ($p | capture("^(?<prefix>.*)/(?<step>checkpoint_step_[0-9]+(?:/.*)?)$")) as $m
219- | ($m.prefix + "/" + $model + "/" + $mode + "/" + $m.step)
220- else
221- ($p + "/" + $model + "/" + $mode)
222- end;
223-
224- .tests[$i].args as $args
225- | (if ($args | has("resume_from")) then "resume" else "no_resume" end) as $run_mode
226- | (if (($args.resume_from // "") | test("(^|/)no_resume(/|$)")) then "no_resume" else "resume" end) as $resume_src_mode
227- | $args
228- | (if has("checkpoint_dir") then .checkpoint_dir = namespaced_path(.checkpoint_dir; $model; $run_mode) else . end)
229- | (if has("resume_from") then .resume_from = namespaced_path(.resume_from; $model; $resume_src_mode) else . end)
230- | to_entries[]
231- | "--\(.key) \(.value|tostring)"
232- ' " $CONFIG_FILE " | paste -sd' ' -
213+ local group_idx=" $1 "
214+ local test_idx=" $2 "
215+ local model_name=" $3 "
216+ local test_id=" $4 "
217+
218+ jq -r --argjson g " $group_idx " --argjson t " $test_idx " --arg model " $model_name " --arg test_id " $test_id " '
219+ def namespaced_path($p; $model; $mode):
220+ if ($p | test("/checkpoint_step_[0-9]+($|/)")) then
221+ ($p | capture("^(?<prefix>.*)/(?<step>checkpoint_step_[0-9]+(?:/.*)?)$")) as $m
222+ | ($m.prefix + "/" + $model + "/" + $mode + "/" + $m.step)
223+ else
224+ ($p + "/" + $model + "/" + $mode)
225+ end;
226+
227+ .test_groups[$g].tests[$t].args as $args
228+ | (if ($args | has("resume_from")) then "resume" else "no_resume" end) as $run_mode
229+ | (if (($args.resume_from // "") | test("no_resume")) then "no_resume" else "resume" end) as $resume_src_mode
230+ | $args
231+ | (if has("checkpoint_dir") then .checkpoint_dir = namespaced_path(.checkpoint_dir; $model; $run_mode) else . end)
232+ | (if has("resume_from") then .resume_from = namespaced_path(.resume_from; $model; $resume_src_mode) else . end)
233+ | to_entries[]
234+ | "--\(.key) \(.value|tostring)"
235+ ' " $CONFIG_FILE " | paste -sd' ' -
233236}
234237
235238# Run tests
@@ -268,18 +271,28 @@ for ((id=0; id<num_builds; ++id)); do
268271 log_suffix=" _profile"
269272 fi
270273
271- for (( ti= 0 ; ti< num_tests; ++ ti)) ; do
272- test_id=$( jq -r " .tests[$ti ].id" " $CONFIG_FILE " )
273- gpt2_arg_str=" $( args_string_for_test " $ti " " gpt2" ) "
274- llama3_arg_str=" $( args_string_for_test " $ti " " llama3" ) "
274+ for (( gi= 0 ; gi< num_groups; ++ gi)) ; do
275+ group_tag=$( jq -r " .test_groups[$gi ].tag" " $CONFIG_FILE " )
276+ if [[ ${# SELECTED_TAGS[@]} -gt 0 && -z " ${SELECTED_TAGS[$group_tag]} " ]]; then
277+ continue
278+ fi
279+
280+ num_tests=$( jq " .test_groups[$gi ].tests | length" " $CONFIG_FILE " )
281+ echo -e " \033[1;36m[TEST GROUP] tag=${group_tag} , cases=${num_tests} \033[0m"
282+
283+ for (( ti= 0 ; ti< num_tests; ++ ti)) ; do
284+ test_id=$( jq -r " .test_groups[$gi ].tests[$ti ].id" " $CONFIG_FILE " )
285+ gpt2_arg_str=" $( args_string_for_test " $gi " " $ti " " gpt2" " $test_id " ) "
286+ llama3_arg_str=" $( args_string_for_test " $gi " " $ti " " llama3" " $test_id " ) "
275287
276- # gpt2
277- gpt2_cmd=" ${prefix} ./gpt2 --input_bin ${GPT2_INPUT_BIN} --llmc_filepath ${GPT2_LLMC_FILEPATH} --device cuda ${gpt2_arg_str} "
278- run_and_log " $gpt2_cmd " " gpt2_${test_id}${log_suffix} " " $profile_flag "
288+ # gpt2
289+ gpt2_cmd=" ${prefix} ./gpt2 --input_bin ${GPT2_INPUT_BIN} --llmc_filepath ${GPT2_LLMC_FILEPATH} --device cuda ${gpt2_arg_str} "
290+ run_and_log " $gpt2_cmd " " gpt2_${test_id}${log_suffix} " " $profile_flag " " $group_tag "
279291
280- # llama3
281- llama3_cmd=" ${prefix} ./llama3 --input_bin ${LLAMA3_INPUT_BIN} --llmc_filepath ${LLAMA3_LLMC_FILEPATH} --device cuda ${llama3_arg_str} "
282- run_and_log " $llama3_cmd " " llama3_${test_id}${log_suffix} " " $profile_flag "
292+ # llama3
293+ llama3_cmd=" ${prefix} ./llama3 --input_bin ${LLAMA3_INPUT_BIN} --llmc_filepath ${LLAMA3_LLMC_FILEPATH} --device cuda ${llama3_arg_str} "
294+ run_and_log " $llama3_cmd " " llama3_${test_id}${log_suffix} " " $profile_flag " " $group_tag "
295+ done
283296 done
284297done
285298
0 commit comments