-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathtrain_hf_models.sh
More file actions
executable file
·162 lines (144 loc) · 4.5 KB
/
train_hf_models.sh
File metadata and controls
executable file
·162 lines (144 loc) · 4.5 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
#!/bin/bash
# Train HuggingFace Models for Public Release
# This script trains high-quality models (one per author) to low loss for HF deployment
set -e
# Color codes
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m'
print_info() { echo -e "${BLUE}[INFO]${NC} $1"; }
print_success() { echo -e "${GREEN}[SUCCESS]${NC} $1"; }
print_warning() { echo -e "${YELLOW}[WARNING]${NC} $1"; }
print_error() { echo -e "${RED}[ERROR]${NC} $1"; }
# Default values
TRAIN_AUTHOR=""
TRAIN_ALL=false
TARGET_LOSS=0.1
OUTPUT_DIR="models_hf"
MAX_EPOCHS=50000
# Parse arguments
while [[ $# -gt 0 ]]; do
case $1 in
--author)
TRAIN_AUTHOR="$2"
shift 2
;;
--all)
TRAIN_ALL=true
shift
;;
--target-loss)
TARGET_LOSS="$2"
shift 2
;;
--output-dir)
OUTPUT_DIR="$2"
shift 2
;;
--max-epochs)
MAX_EPOCHS="$2"
shift 2
;;
-h|--help)
echo "Usage: $0 [OPTIONS]"
echo ""
echo "Train high-quality models for HuggingFace deployment"
echo ""
echo "Options:"
echo " --author NAME Train single author"
echo " --all Train all 8 authors (sequentially)"
echo " --target-loss LOSS Target training loss (default: 0.1)"
echo " --output-dir DIR Output directory (default: models_hf)"
echo " --max-epochs N Maximum epochs (default: 50000)"
echo " -h, --help Show this help"
echo ""
echo "Examples:"
echo " $0 --author baum # Train Baum model to loss 0.1"
echo " $0 --all --target-loss 0.05 # Train all to loss 0.05"
echo " $0 --author austen # Train Austen model"
exit 0
;;
*)
print_error "Unknown option: $1"
echo "Use --help for usage information"
exit 1
;;
esac
done
# Validate arguments
if [ "$TRAIN_ALL" = false ] && [ -z "$TRAIN_AUTHOR" ]; then
print_error "Must specify --author or --all"
echo "Use --help for usage information"
exit 1
fi
echo "=================================================="
echo " HuggingFace Model Training"
echo "=================================================="
echo
print_info "Target loss: $TARGET_LOSS"
print_info "Output directory: $OUTPUT_DIR"
print_info "Max epochs: $MAX_EPOCHS"
echo
# Activate conda environment
if ! command -v conda &> /dev/null; then
print_error "conda not found. Please install Miniconda or Anaconda."
exit 1
fi
print_info "Activating conda environment..."
eval "$(conda shell.bash hook)" 2>/dev/null || {
print_error "Failed to initialize conda"
exit 1
}
conda activate llm-stylometry 2>/dev/null || {
print_error "Failed to activate llm-stylometry environment"
print_info "Run: ./run_llm_stylometry.sh (to set up environment)"
exit 1
}
# Build author list
if [ "$TRAIN_ALL" = true ]; then
AUTHORS="austen baum dickens fitzgerald melville thompson twain wells"
print_info "Training all 8 authors in parallel"
else
AUTHORS="$TRAIN_AUTHOR"
print_info "Training: $TRAIN_AUTHOR"
fi
echo
# Always use multiprocessing script for parallel GPU training
print_info "Using multiprocessing for parallel GPU training..."
if python code/train_all_hf_models.py $AUTHORS \
--target-loss "$TARGET_LOSS" \
--max-epochs "$MAX_EPOCHS" \
--max-gpus 8; then
TRAINED_COUNT=$(echo "$AUTHORS" | wc -w)
FAILED_COUNT=0
print_success "Training completed successfully"
else
TRAINED_COUNT=0
FAILED_COUNT=$(echo "$AUTHORS" | wc -w)
print_error "Training failed"
fi
# Summary
echo "=================================================="
echo " Summary"
echo "=================================================="
echo "✓ Models trained: $TRAINED_COUNT"
if [ "$FAILED_COUNT" -gt 0 ]; then
echo "✗ Failed: $FAILED_COUNT"
fi
echo
echo "Models saved to: $OUTPUT_DIR"
echo
if [ "$TRAINED_COUNT" -gt 0 ]; then
print_success "Training complete!"
echo
echo "Next steps:"
echo "1. Verify model quality: python -c 'from transformers import GPT2LMHeadModel; ...'"
echo "2. Generate text samples to check quality"
echo "3. Upload to HuggingFace: ./upload_to_huggingface.sh"
exit 0
else
print_error "No models were trained successfully"
exit 1
fi