-
Notifications
You must be signed in to change notification settings - Fork 7
Expand file tree
/
Copy pathmulticlass_classification.py
More file actions
174 lines (144 loc) · 5.84 KB
/
multiclass_classification.py
File metadata and controls
174 lines (144 loc) · 5.84 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
"""
Multi-class Text Classification Example
This example demonstrates multi-class text classification using
torchTextClassifiers for sentiment analysis with
3 classes: positive, negative, and neutral.
"""
import os
import random
import warnings
import numpy as np
import torch
from pytorch_lightning import seed_everything
from torchTextClassifiers import ModelConfig, TrainingConfig, torchTextClassifiers
from torchTextClassifiers.tokenizers import WordPieceTokenizer
def main():
# Set seed for reproducibility
SEED = 42
# Set environment variables for full reproducibility
os.environ['PYTHONHASHSEED'] = str(SEED)
os.environ['CUBLAS_WORKSPACE_CONFIG'] = ':4096:8'
# Use PyTorch Lightning's seed_everything for comprehensive seeding
seed_everything(SEED, workers=True)
# Make PyTorch operations deterministic
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
torch.use_deterministic_algorithms(True, warn_only=True)
# Suppress PyTorch Lightning warnings for cleaner output
warnings.filterwarnings(
'ignore',
message='.*',
category=UserWarning,
module='pytorch_lightning'
)
print("🎭 Multi-class Text Classification Example")
print("=" * 50)
# Create multi-class sample data (3 classes: 0=negative, 1=neutral, 2=positive)
print("📝 Creating multi-class sentiment data...")
X_train = np.array([
# Negative examples (class 0)
"This product is terrible and I hate it completely.",
"Worst purchase ever. Total waste of money.",
"Absolutely awful quality. Very disappointed.",
"Poor service and terrible product quality.",
"I regret buying this. Complete failure.",
# Neutral examples (class 1)
"The product is okay, nothing special though.",
"It works but could be better designed.",
"Average quality for the price point.",
"Not bad but not great either.",
"It's fine, meets basic expectations.",
# Positive examples (class 2)
"Excellent product! Highly recommended!",
"Amazing quality and great customer service.",
"Perfect! Exactly what I was looking for.",
"Outstanding value and excellent performance.",
"Love it! Will definitely buy again."
])
y_train = np.array([0, 0, 0, 0, 0, # negative
1, 1, 1, 1, 1, # neutral
2, 2, 2, 2, 2]) # positive
# Validation data
X_val = np.array([
"Bad quality, not recommended.", # negative
"It's okay, does the job.", # neutral
"Great product, very satisfied!" # positive
])
y_val = np.array([0, 1, 2])
# Test data
X_test = np.array([
"This is absolutely horrible!",
"It's an average product, nothing more.",
"Fantastic! Love every aspect of it!",
"Really poor design and quality.",
"Works well, good value for money.",
"Outstanding product with amazing features!"
])
y_test = np.array([0, 1, 2, 0, 1, 2])
print(f"Training samples: {len(X_train)}")
print(f"Class distribution: Negative={sum(y_train==0)}, Neutral={sum(y_train==1)}, Positive={sum(y_train==2)}")
# Create and train tokenizer
print("\n🏗️ Creating and training WordPiece tokenizer...")
tokenizer = WordPieceTokenizer(vocab_size=5000, output_dim=128)
training_corpus = X_train.tolist()
tokenizer.train(training_corpus)
print("✅ Tokenizer trained successfully!")
# Create model configuration for 3 classes
print("\n🔧 Creating model configuration...")
model_config = ModelConfig(
embedding_dim=64,
num_classes=3 # 3 classes for sentiment (negative, neutral, positive)
)
# Create classifier
print("\n🔨 Creating multi-class classifier...")
classifier = torchTextClassifiers(
tokenizer=tokenizer,
model_config=model_config
)
print("✅ Classifier created successfully!")
# Train the model
print("\n🎯 Training model...")
training_config = TrainingConfig(
num_epochs=30,
batch_size=8,
lr=1e-3,
patience_early_stopping=7,
num_workers=0,
trainer_params={'deterministic': True}
)
classifier.train(
X_train, y_train,
training_config=training_config,
X_val=X_val, y_val=y_val,
verbose=True
)
print("✅ Training completed!")
# Make predictions
print("\n🔮 Making predictions...")
result = classifier.predict(X_test)
predictions = result["prediction"].squeeze().numpy()
print(f"Predictions: {predictions}")
print(f"True labels: {y_test}")
# Calculate accuracy
accuracy = (predictions == y_test).mean()
print(f"Test accuracy: {accuracy:.3f}")
# Define class names for better output
class_names = ["Negative", "Neutral", "Positive"]
# Show detailed results
print("\n📊 Detailed Results:")
print("-" * 60)
correct_predictions = 0
for i, (text, pred, true) in enumerate(zip(X_test, predictions, y_test)):
predicted_sentiment = class_names[pred]
true_sentiment = class_names[true]
correct = pred == true
if correct:
correct_predictions += 1
status = "✅" if correct else "❌"
print(f"{i+1}. {status} Predicted: {predicted_sentiment}, True: {true_sentiment}")
print(f" Text: {text}")
print()
print(f"Final Accuracy: {correct_predictions}/{len(X_test)} = {correct_predictions/len(X_test):.3f}")
print("\n🎉 Multi-class example completed successfully!")
if __name__ == "__main__":
main()