This guide explains how to integrate fine-tuned models from this lab into the SynthoraAI AI-Gov-Content-Curator project.
The LLM Fine-Tuning Lab is designed to seamlessly integrate with the SynthoraAI backend for:
- Article summarization
- Content classification
- Bias detection
- Question answering
from src.data import DatasetBuilder
# Initialize builder with SynthoraAI API
builder = DatasetBuilder(
api_url="https://ai-content-curator-backend.vercel.app"
)
# Build summarization dataset
dataset = builder.build_summarization_dataset(limit=5000)
# Save for training
dataset.save("datasets/processed/gov_articles.json")python scripts/prepare_data.py \
--task summarization \
--api-url https://ai-content-curator-backend.vercel.app \
--limit 5000 \
--output datasets/processed/gov_articles.jsonpython scripts/train_summarizer.py \
--config configs/summarization.yaml \
--data datasets/processed/gov_articles.json \
--output checkpoints/synthoraai-summarizerpython scripts/train_classifier.py \
--config configs/classification.yaml \
--data datasets/processed/labeled_articles.json \
--output checkpoints/synthoraai-classifierExport models for production deployment:
# Export to ONNX (recommended for SynthoraAI)
python scripts/export.py \
--model checkpoints/synthoraai-summarizer \
--output exports/summarizer \
--format onnx \
--quantize
# Export to TorchScript
python scripts/export.py \
--model checkpoints/synthoraai-summarizer \
--output exports/summarizer.pt \
--format torchscriptUpdate backend/utils/aiSummarizer.js:
const ort = require('onnxruntime-node');
class FineTunedSummarizer {
constructor(modelPath) {
this.session = null;
this.modelPath = modelPath;
}
async initialize() {
this.session = await ort.InferenceSession.create(this.modelPath);
}
async summarize(content, instructions = '') {
// Prepare input
const inputs = this.tokenize(content);
// Run inference
const outputs = await this.session.run(inputs);
// Decode output
const summary = this.decode(outputs);
return summary;
}
tokenize(text) {
// Implement tokenization
// Load tokenizer config from exports/summarizer/tokenizer.json
}
decode(outputs) {
// Implement decoding
}
}
module.exports = { FineTunedSummarizer };Use fine-tuned model as primary with Google AI as fallback:
const { FineTunedSummarizer } = require('./fineTunedSummarizer');
const { GoogleGenerativeAI } = require('@google/generative-ai');
class HybridSummarizer {
constructor() {
this.fineTuned = new FineTunedSummarizer('exports/summarizer/model.onnx');
this.googleAI = new GoogleGenerativeAI(process.env.GOOGLE_AI_API_KEY);
}
async summarize(content) {
try {
// Try fine-tuned model first
return await this.fineTuned.summarize(content);
} catch (error) {
console.log('Falling back to Google AI:', error);
// Fallback to Google AI
const model = this.googleAI.getGenerativeModel({ model: 'gemini-pro' });
const result = await model.generateContent(content);
return result.response.text();
}
}
}Create backend/api/models/route.js:
import { NextResponse } from 'next/server';
import { FineTunedSummarizer } from '@/utils/fineTunedSummarizer';
const summarizer = new FineTunedSummarizer('exports/summarizer/model.onnx');
export async function POST(request) {
const { content } = await request.json();
try {
const summary = await summarizer.summarize(content);
return NextResponse.json({
success: true,
summary,
model: 'fine-tuned-flan-t5',
});
} catch (error) {
return NextResponse.json(
{ success: false, error: error.message },
{ status: 500 }
);
}
}Add to SynthoraAI backend .env:
# Fine-Tuned Models
FINETUNED_MODEL_PATH=./models/summarizer/model.onnx
FINETUNED_TOKENIZER_PATH=./models/summarizer/tokenizer.json
USE_FINETUNED_MODEL=true
# Fallback
GOOGLE_AI_API_KEY=your_key_here
FALLBACK_TO_GOOGLE_AI=trueImplement caching for model inference:
const redis = require('redis');
const client = redis.createClient();
async function getCachedSummary(articleId, content) {
// Check cache
const cached = await client.get(`summary:${articleId}`);
if (cached) return JSON.parse(cached);
// Generate summary
const summary = await summarizer.summarize(content);
// Cache result
await client.setEx(
`summary:${articleId}`,
3600, // 1 hour
JSON.stringify(summary)
);
return summary;
}Process multiple articles efficiently:
async function batchSummarize(articles) {
const batchSize = 8;
const results = [];
for (let i = 0; i < articles.length; i += batchSize) {
const batch = articles.slice(i, i + batchSize);
const summaries = await Promise.all(
batch.map(article => summarizer.summarize(article.content))
);
results.push(...summaries);
}
return results;
}Track model performance:
const metrics = {
requests: 0,
errors: 0,
avgLatency: 0,
};
async function monitoredSummarize(content) {
const start = Date.now();
metrics.requests++;
try {
const summary = await summarizer.summarize(content);
const latency = Date.now() - start;
// Update average latency
metrics.avgLatency =
(metrics.avgLatency * (metrics.requests - 1) + latency) /
metrics.requests;
return summary;
} catch (error) {
metrics.errors++;
throw error;
}
}Deploy the fine-tuned model with Vercel:
- Add model files to
public/models/ - Create edge function:
// api/summarize/route.js
export const config = {
runtime: 'edge',
};
export default async function handler(req) {
const { content } = await req.json();
// Load model (cached in edge runtime)
const summary = await summarize(content);
return new Response(JSON.stringify({ summary }), {
headers: { 'content-type': 'application/json' },
});
}Test the integration:
// __tests__/integration/finetuned-model.test.js
describe('Fine-Tuned Model Integration', () => {
it('should summarize articles correctly', async () => {
const article = {
content: 'Test article content...',
};
const summary = await summarizer.summarize(article.content);
expect(summary).toBeDefined();
expect(summary.length).toBeGreaterThan(0);
expect(summary.length).toBeLessThan(article.content.length);
});
it('should fallback to Google AI on error', async () => {
// Mock fine-tuned model error
jest.spyOn(fineTuned, 'summarize').mockRejectedValue(new Error('Model error'));
const summary = await hybridSummarizer.summarize('Test content');
expect(summary).toBeDefined();
// Verify Google AI was called
});
});- Always test models before deploying to production
- Monitor performance and latency
- Implement fallbacks for reliability
- Cache results to reduce inference costs
- Version your models for easy rollback
- Log predictions for quality monitoring
try {
await model.initialize();
} catch (error) {
console.error('Model loading failed:', error);
// Check file paths, permissions, dependencies
}- Use quantized models (8-bit or 4-bit)
- Implement request batching
- Enable caching
- Consider edge deployment
- Evaluate on test set
- Compare with baseline (Google AI)
- Monitor user feedback
- Retrain with more data if needed
For integration support:
- GitHub Issues: Create an issue
- Email: hoangson091104@gmail.com
- Documentation: Full docs