Skip to content

Commit 91d722e

Browse files
fix: add lr_scheduler test group in config
1 parent 315ebbb commit 91d722e

1 file changed

Lines changed: 176 additions & 0 deletions

File tree

scripts/test_config.json

Lines changed: 176 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -304,6 +304,182 @@
304304
}
305305
]
306306
},
307+
{
308+
"tag": "lr_scheduler",
309+
"tests": [
310+
{
311+
"id": "3_none_distopt",
312+
"args": {
313+
"dtype": "float32",
314+
"nthread_per_process": 8,
315+
"num_iteration": 10,
316+
"batch_size": 10,
317+
"total_batch_size": 5120,
318+
"use_distributed_optimizer": true,
319+
"learning_rate": 0.00001,
320+
"lr_decay_style": "none"
321+
}
322+
},
323+
{
324+
"id": "4_constant_tp4",
325+
"args": {
326+
"dtype": "float32",
327+
"nthread_per_process": 8,
328+
"num_iteration": 10,
329+
"batch_size": 40,
330+
"total_batch_size": 5120,
331+
"tensor_parallel": 4,
332+
"learning_rate": 0.00001,
333+
"min_lr": 0.000001,
334+
"lr_decay_style": "constant",
335+
"lr_warmup_iters": 0,
336+
"lr_decay_iters": 0
337+
}
338+
},
339+
{
340+
"id": "5_linear_tp4_sp_distopt",
341+
"args": {
342+
"dtype": "float32",
343+
"nthread_per_process": 8,
344+
"num_iteration": 10,
345+
"batch_size": 40,
346+
"total_batch_size": 5120,
347+
"tensor_parallel": 4,
348+
"sequence_parallel": true,
349+
"use_distributed_optimizer": true,
350+
"learning_rate": 0.00001,
351+
"min_lr": 0.000001,
352+
"lr_decay_style": "linear",
353+
"lr_warmup_iters": 2,
354+
"lr_warmup_init": 0.0,
355+
"lr_decay_iters": 10
356+
}
357+
},
358+
{
359+
"id": "6_cosine_pp8",
360+
"args": {
361+
"dtype": "float32",
362+
"nthread_per_process": 8,
363+
"num_iteration": 10,
364+
"batch_size": 10,
365+
"total_batch_size": 5120,
366+
"pipeline_parallel": 8,
367+
"learning_rate": 0.00001,
368+
"min_lr": 0.000001,
369+
"lr_decay_style": "cosine",
370+
"lr_warmup_iters": 2,
371+
"lr_warmup_init": 0.0,
372+
"lr_decay_iters": 10
373+
}
374+
},
375+
{
376+
"id": "7_inverse_sqrt_pp4_vpp2",
377+
"args": {
378+
"dtype": "float32",
379+
"nthread_per_process": 4,
380+
"num_iteration": 10,
381+
"batch_size": 10,
382+
"total_batch_size": 5120,
383+
"pipeline_parallel": 4,
384+
"virtual_pipeline_parallel": 2,
385+
"learning_rate": 0.00001,
386+
"min_lr": 0.000001,
387+
"lr_decay_style": "inverse-square-root",
388+
"lr_warmup_iters": 2,
389+
"lr_warmup_init": 0.0,
390+
"lr_decay_iters": 10
391+
}
392+
},
393+
{
394+
"id": "8_cosine_all_parallel_distopt",
395+
"args": {
396+
"dtype": "float32",
397+
"nthread_per_process": 8,
398+
"num_iteration": 10,
399+
"batch_size": 40,
400+
"total_batch_size": 5120,
401+
"tensor_parallel": 2,
402+
"sequence_parallel": true,
403+
"pipeline_parallel": 2,
404+
"virtual_pipeline_parallel": 2,
405+
"use_distributed_optimizer": true,
406+
"learning_rate": 0.00001,
407+
"min_lr": 0.000001,
408+
"lr_decay_style": "cosine",
409+
"lr_warmup_iters": 2,
410+
"lr_warmup_init": 0.0,
411+
"lr_decay_iters": 10
412+
}
413+
},
414+
{
415+
"id": "3_bfloat16_linear",
416+
"args": {
417+
"dtype": "bfloat16",
418+
"nthread_per_process": 8,
419+
"num_iteration": 10,
420+
"batch_size": 10,
421+
"total_batch_size": 5120,
422+
"learning_rate": 0.00001,
423+
"min_lr": 0.000001,
424+
"lr_decay_style": "linear",
425+
"lr_warmup_iters": 2,
426+
"lr_warmup_init": 0.0,
427+
"lr_decay_iters": 0
428+
}
429+
},
430+
{
431+
"id": "4_bfloat16_inverse_sqrt_tp4_distopt",
432+
"args": {
433+
"dtype": "bfloat16",
434+
"nthread_per_process": 8,
435+
"num_iteration": 10,
436+
"batch_size": 40,
437+
"total_batch_size": 5120,
438+
"tensor_parallel": 4,
439+
"use_distributed_optimizer": true,
440+
"learning_rate": 0.00001,
441+
"min_lr": 0.000001,
442+
"lr_decay_style": "inverse-square-root",
443+
"lr_warmup_iters": 2,
444+
"lr_warmup_init": 0.0,
445+
"lr_decay_iters": 10
446+
}
447+
},
448+
{
449+
"id": "5_bfloat16_constant_tp4_sp",
450+
"args": {
451+
"dtype": "bfloat16",
452+
"nthread_per_process": 8,
453+
"num_iteration": 10,
454+
"batch_size": 40,
455+
"total_batch_size": 5120,
456+
"tensor_parallel": 4,
457+
"sequence_parallel": true,
458+
"learning_rate": 0.00001,
459+
"min_lr": 0.000001,
460+
"lr_decay_style": "constant",
461+
"lr_warmup_iters": 0,
462+
"lr_decay_iters": 10
463+
}
464+
},
465+
{
466+
"id": "8_bfloat16_none_all_parallel",
467+
"args": {
468+
"dtype": "bfloat16",
469+
"nthread_per_process": 8,
470+
"num_iteration": 10,
471+
"batch_size": 40,
472+
"total_batch_size": 5120,
473+
"tensor_parallel": 2,
474+
"sequence_parallel": true,
475+
"pipeline_parallel": 2,
476+
"virtual_pipeline_parallel": 2,
477+
"learning_rate": 0.00001,
478+
"lr_decay_style": "none"
479+
}
480+
}
481+
]
482+
},
307483
{
308484
"tag": "lora",
309485
"tests": [

0 commit comments

Comments
 (0)