Skip to content

Commit 86a5e5b

Browse files
committed
End on correct epoch
1 parent f685a2b commit 86a5e5b

3 files changed

Lines changed: 15 additions & 15 deletions

File tree

task_config_template.cfg

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,7 @@ num_gpus = 4
121121
patterns = ['time = (\d+\.\d+|\d+)', 'wps = (\d+\.\d+|\d+)', 'train loss = (\d+\.\d+|\d+)', 'ppl = (\d+\.\d+|\d+)', 'log_perplexity = (\d+\.\d+|\d+)', 'perplexity = (\d+\.\d+|\d+)']
122122
metrics = ['total_training_time', 'words_per_second', 'train_loss', 'train_perplexity', 'test_loss', 'test_perplexity']
123123
compute_method = ['total', 'average', 'last', 'last', 'last', 'last']
124-
command_to_execute = python tensorflow_benchmark/tf_word_language_model/single_lm_train.py --gpus=4 --epochs=5 --datadir=tensorflow_benchmark/tf_word_language_model/1-billion-word-language-modeling-benchmark-r13output/ --hpconfig run_profiler=False,float16_rnn=False,num_steps=20,num_shards=8,num_layers=1,learning_rate=0.2,max_grad_norm=1,keep_prob=0.9,emb_size=1024,projected_size=512,state_size=2048,num_sampled=8192,batch_size=256
124+
command_to_execute = for i in {1..5}; do python tensorflow_benchmark/tf_word_language_model/single_lm_train.py --gpus=4 --epochs=1 --datadir=tensorflow_benchmark/tf_word_language_model/1-billion-word-language-modeling-benchmark-r13output/ --hpconfig run_profiler=False,float16_rnn=False,num_steps=20,num_shards=8,num_layers=1,learning_rate=0.2,max_grad_norm=1,keep_prob=0.9,emb_size=1024,projected_size=512,state_size=2048,num_sampled=8192,batch_size=256; python tensorflow_benchmark/tf_word_language_model/single_lm_train.py --logdir=/home/ubuntu/deep-learning-benchmark-mirror/lm1b --num_gpus=1 --mode=eval_full --datadir=tensorflow_benchmark/tf_word_language_model/1-billion-word-language-modeling-benchmark-r13output/ --hpconfig run_profiler=False,float16_rnn=False,num_steps=20,num_shards=8,num_layers=1,learning_rate=0.2,max_grad_norm=1,keep_prob=0.9,emb_size=1024,projected_size=512,state_size=2048,num_sampled=8192,batch_size=1; done
125125
num_gpus = 4
126126

127127
[resnet50_imagenet_symbolic_fp16_batch_size32_p3_16]

tensorflow_benchmark/tf_word_language_model/run_utils.py

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -68,19 +68,19 @@ def run_train(dataset, hps, logdir, ps_device, eval_dataset, task=0, master=""):
6868

6969
if epoch_done:
7070
cur_epoch = int(x)
71-
eval_data_iterator = eval_dataset.iterate_once(hps.batch_size * hps.num_gpus, hps.num_steps)
72-
loss_nom = 0.0
73-
loss_den = 0.0
74-
75-
for i, (x, y) in enumerate(eval_data_iterator):
76-
loss = sess.run(model.loss, {model.x: x, model.y: y})
77-
loss_nom += loss
78-
loss_den += 1
79-
loss = loss_nom / loss_den
80-
81-
log_perplexity = loss_nom / loss_den
82-
print("Results after epoch %d: log_perplexity = %.3f perplexity = %.3f" % (
83-
cur_epoch, log_perplexity, np.exp(log_perplexity)))
71+
# eval_data_iterator = eval_dataset.iterate_once(hps.batch_size * hps.num_gpus, hps.num_steps)
72+
# loss_nom = 0.0
73+
# loss_den = 0.0
74+
#
75+
# for i, (x, y) in enumerate(eval_data_iterator):
76+
# loss = sess.run(model.loss, {model.x: x, model.y: y})
77+
# loss_nom += loss
78+
# loss_den += 1
79+
# loss = loss_nom / loss_den
80+
#
81+
# log_perplexity = loss_nom / loss_den
82+
# print("Results after epoch %d: log_perplexity = %.3f perplexity = %.3f" % (
83+
# cur_epoch, log_perplexity, np.exp(log_perplexity)))
8484

8585
x, y = next(data_iterator)
8686

utils/cfg_process.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ def generate_cfg(cfg_template, cfg_path, **infra_spec):
5151
elif "num_gpus" in infra_spec:
5252
cmd = re.sub("--gpus=\d+", "", cmd)
5353
if "epochs" in infra_spec and infra_spec["epochs"] is not None and infra_spec["epochs"] > 0:
54-
cmd = re.sub("--epochs=\d+", "--epochs=%d" % infra_spec["epochs"], cmd)
54+
cmd = re.sub("{1..\d+}", "{1..%d}" % infra_spec["epochs"], cmd)
5555
else:
5656
if "num_gpus" in infra_spec and infra_spec["num_gpus"] is not None and infra_spec["num_gpus"] > 0:
5757
cmd = re.sub("--gpus \d+", "--gpus %d" % infra_spec["num_gpus"], cmd)

0 commit comments

Comments
 (0)