Synthora-AI
diff --git a/‎.env.example‎
Lines changed: 39 additions & 0 deletions b/‎.env.example‎
Lines changed: 39 additions & 0 deletions
diff --git a/‎.github/workflows/ci.yml‎
Lines changed: 82 additions & 0 deletions b/‎.github/workflows/ci.yml‎
Lines changed: 82 additions & 0 deletions
diff --git a/‎.gitignore‎
Lines changed: 99 additions & 0 deletions b/‎.gitignore‎
Lines changed: 99 additions & 0 deletions
diff --git a/‎CONTRIBUTING.md‎
Lines changed: 137 additions & 0 deletions b/‎CONTRIBUTING.md‎
Lines changed: 137 additions & 0 deletions
@@ -0,0 +1,39 @@
+# Environment variables for LLM Fine-Tuning Lab
+
+# API Keys
+GOOGLE_AI_API_KEY=your_google_ai_api_key_here
+OPENAI_API_KEY=your_openai_api_key_here
+HUGGINGFACE_TOKEN=your_huggingface_token_here
+
+# SynthoraAI Backend
+SYNTHORAAI_API_URL=https://ai-content-curator-backend.vercel.app
+MONGODB_URI=mongodb://localhost:27017/synthoraai
+
+# Weights & Biases
+WANDB_API_KEY=your_wandb_api_key_here
+WANDB_PROJECT=synthoraai-finetuning
+WANDB_ENTITY=your_wandb_entity
+
+# Model Paths
+MODEL_CACHE_DIR=./models
+CHECKPOINT_DIR=./checkpoints
+DATA_DIR=./datasets
+
+# Training Configuration
+USE_GPU=true
+GPU_DEVICE=0
+BATCH_SIZE=8
+LEARNING_RATE=5e-5
+NUM_EPOCHS=3
+
+# Logging
+LOG_LEVEL=INFO
+LOG_FILE=logs/training.log
+
+# Redis (optional)
+REDIS_HOST=localhost
+REDIS_PORT=6379
+
+# Pinecone (for vector search)
+PINECONE_API_KEY=your_pinecone_api_key_here
+PINECONE_ENV=your_pinecone_environment_here
@@ -0,0 +1,82 @@
+name: CI/CD Pipeline
+
+on:
+  push:
+    branches: [ main, develop ]
+  pull_request:
+    branches: [ main, develop ]
+
+jobs:
+  lint:
+    name: Code Quality
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.11'
+
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install black flake8 isort mypy
+
+      - name: Run Black
+        run: black --check src/ scripts/
+
+      - name: Run Flake8
+        run: flake8 src/ scripts/ --max-line-length=120
+
+      - name: Run isort
+        run: isort --check-only src/ scripts/
+
+  test:
+    name: Run Tests
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python-version: ['3.9', '3.10', '3.11']
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}
+
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install -r requirements.txt
+          pip install pytest pytest-cov
+
+      - name: Run tests
+        run: |
+          pytest tests/ --cov=src --cov-report=xml --cov-report=term
+
+      - name: Upload coverage
+        uses: codecov/codecov-action@v4
+        with:
+          file: ./coverage.xml
+          fail_ci_if_error: false
+
+  build-docker:
+    name: Build Docker Image
+    runs-on: ubuntu-latest
+    needs: [lint, test]
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+
+      - name: Build Docker image
+        uses: docker/build-push-action@v5
+        with:
+          context: .
+          push: false
+          tags: synthoraai/llm-finetuning-lab:latest
+          cache-from: type=gha
+          cache-to: type=gha,mode=max
@@ -0,0 +1,99 @@
+# Python
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+
+# Virtual environments
+venv/
+env/
+ENV/
+.venv
+
+# IDE
+.vscode/
+.idea/
+*.swp
+*.swo
+*~
+.DS_Store
+
+# Jupyter
+.ipynb_checkpoints
+*.ipynb
+
+# Model checkpoints
+checkpoints/
+/models/
+*.pth
+*.pt
+*.bin
+*.onnx
+*.pb
+
+# Data
+datasets/
+/data/
+*.csv
+*.json
+*.jsonl
+*.parquet
+
+# Allow source code files
+!src/**/*.py
+!configs/*.yaml
+!*.json
+
+# Logs
+logs/
+*.log
+runs/
+tensorboard/
+
+# Outputs
+outputs/
+exports/
+benchmarks/
+
+# Environment variables
+.env
+.env.local
+.env.*.local
+
+# Testing
+.coverage
+.pytest_cache/
+htmlcov/
+.tox/
+
+# Documentation
+docs/_build/
+site/
+
+# Cache
+.cache/
+*.cache
+
+# Wandb
+wandb/
+
+# Misc
+*.bak
+*.tmp
+.benchmarks
@@ -0,0 +1,137 @@
+# Contributing to LLM Fine-Tuning Lab
+
+Thank you for your interest in contributing to the LLM Fine-Tuning Lab! This document provides guidelines and instructions for contributing.
+
+## Getting Started
+
+1. **Fork the repository**
+2. **Clone your fork**
+   ```bash
+   git clone https://github.com/YOUR_USERNAME/LLM-Finetuning-Lab.git
+   cd LLM-Finetuning-Lab
+   ```
+
+3. **Set up development environment**
+   ```bash
+   make dev-install
+   ```
+
+4. **Create a feature branch**
+   ```bash
+   git checkout -b feature/your-feature-name
+   ```
+
+## Development Guidelines
+
+### Code Style
+
+We follow PEP 8 style guidelines with some modifications:
+
+- Maximum line length: 120 characters
+- Use type hints for function signatures
+- Write docstrings for all public functions and classes
+
+Format your code before committing:
+
+```bash
+make format
+```
+
+### Testing
+
+All new features must include tests:
+
+```bash
+# Run tests
+make test
+
+# Run specific test file
+pytest tests/test_training.py
+```
+
+### Commit Messages
+
+Follow conventional commit format:
+
+```
+<type>(<scope>): <subject>
+
+<body>
+
+<footer>
+```
+
+Types:
+- `feat`: New feature
+- `fix`: Bug fix
+- `docs`: Documentation changes
+- `style`: Code style changes
+- `refactor`: Code refactoring
+- `test`: Adding tests
+- `chore`: Maintenance tasks
+
+Example:
+```
+feat(training): add LoRA fine-tuning support
+
+Implemented LoRA-based fine-tuning for efficient parameter
+adaptation. Includes configuration options and example scripts.
+
+Closes #123
+```
+
+## Pull Request Process
+
+1. **Update documentation** if you're adding new features
+2. **Add tests** for new functionality
+3. **Run linters** and ensure all tests pass
+   ```bash
+   make lint
+   make test
+   ```
+4. **Update CHANGELOG.md** with your changes
+5. **Submit PR** with a clear description
+
+### PR Checklist
+
+- [ ] Code follows style guidelines
+- [ ] Tests added and passing
+- [ ] Documentation updated
+- [ ] CHANGELOG.md updated
+- [ ] Commit messages follow convention
+
+## Areas for Contribution
+
+### High Priority
+
+- [ ] Implement RLHF training pipeline
+- [ ] Add support for multimodal models
+- [ ] Optimize distributed training
+- [ ] Improve documentation
+
+### Medium Priority
+
+- [ ] Add more evaluation metrics
+- [ ] Create tutorial notebooks
+- [ ] Implement model distillation
+- [ ] Add CI/CD improvements
+
+### Good First Issues
+
+Look for issues labeled `good-first-issue` in the issue tracker.
+
+## Code Review Process
+
+1. At least one maintainer review required
+2. All CI checks must pass
+3. Documentation must be updated
+4. Changes must be tested
+
+## Questions?
+
+Feel free to:
+- Open an issue for bugs or feature requests
+- Join our Discord community
+- Email: hoangson091104@gmail.com
+
+Thank you for contributing to SynthoraAI!