|
| 1 | + |
| 2 | +""" |
| 3 | +Executable scripts for training and deployment |
| 4 | +
|
| 5 | +This package contains command-line scripts: |
| 6 | +- production_train.py: Main training script with CLI |
| 7 | +- Additional utility scripts |
| 8 | +""" |
| 9 | + |
| 10 | +__all__ = [] |
| 11 | + |
| 12 | +# Script utilities |
| 13 | +def parse_common_args(): |
| 14 | + """Common argument parser for scripts""" |
| 15 | + import argparse |
| 16 | + |
| 17 | + parser = argparse.ArgumentParser(description='Distributed Training Framework') |
| 18 | + |
| 19 | + # Training arguments |
| 20 | + parser.add_argument('--batch-size', type=int, default=32, help='Batch size per GPU') |
| 21 | + parser.add_argument('--epochs', type=int, default=10, help='Number of epochs') |
| 22 | + parser.add_argument('--lr', type=float, default=0.001, help='Learning rate') |
| 23 | + |
| 24 | + # Distributed arguments |
| 25 | + parser.add_argument('--strategy', type=str, default='ddp', |
| 26 | + choices=['ddp', 'fsdp'], help='Distributed strategy') |
| 27 | + parser.add_argument('--mixed-precision', action='store_true', |
| 28 | + help='Enable mixed precision training') |
| 29 | + |
| 30 | + # Checkpoint arguments |
| 31 | + parser.add_argument('--checkpoint-dir', type=str, default='./checkpoints', |
| 32 | + help='Checkpoint directory') |
| 33 | + parser.add_argument('--resume', type=str, default=None, |
| 34 | + help='Resume from checkpoint') |
| 35 | + |
| 36 | + return parser |
0 commit comments