@@ -12,6 +12,9 @@ def _run() -> None:
1212 args = utils .parse_args ()
1313 utils .update_config (args )
1414 job_name = f"{ CFG .experiment_id } _{ CFG .seed } "
15+ os .makedirs (CFG .log_dir , exist_ok = True )
16+ logfile_pattern = os .path .join (CFG .log_dir ,
17+ f"{ utils .get_config_path_str ()} __%j.log" )
1518 argsstr = " " .join (sys .argv [1 :])
1619 mystr = f"#!/bin/bash\n python src/main.py { argsstr } "
1720 temp_run_file = "temp_run_file.sh"
@@ -20,14 +23,15 @@ def _run() -> None:
2023 f .write (mystr )
2124 cmd = ("sbatch -p normal --time=99:00:00 --partition=xeon-p8 "
2225 f"--nodes=1 --exclusive --job-name={ job_name } "
23- f"-o /tmp/%j_log.out { temp_run_file } " )
26+ f"-o { logfile_pattern } { temp_run_file } " )
2427 print (f"Running command: { cmd } " )
2528 output = subprocess .getoutput (cmd )
2629 if "command not found" in output :
2730 os .remove (temp_run_file )
2831 raise Exception ("Are you logged into supercloud?" )
2932 os .remove (temp_run_file )
30- logfile = os .path .join (CFG .log_dir , f"{ utils .get_config_path_str ()} .log" )
33+ job_id = output .split ()[- 1 ]
34+ logfile = logfile_pattern .replace ("%j" , str (job_id ))
3135 print (f"Started job, see log with:\n tail -n 10000 -F { logfile } " )
3236
3337
0 commit comments