https-deeplearning-ai
diff --git a/‎.gitignore‎
Lines changed: 3 additions & 0 deletions b/‎.gitignore‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎Learner Tooling/database-viewer‎
Lines changed: 0 additions & 1 deletion b/‎Learner Tooling/database-viewer‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎Learner Tooling/database-viewer/.env.example‎
Lines changed: 5 additions & 0 deletions b/‎Learner Tooling/database-viewer/.env.example‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎Learner Tooling/database-viewer/.gitignore‎
Lines changed: 35 additions & 0 deletions b/‎Learner Tooling/database-viewer/.gitignore‎
Lines changed: 35 additions & 0 deletions
diff --git a/‎Learner Tooling/database-viewer/README.md‎
Lines changed: 117 additions & 0 deletions b/‎Learner Tooling/database-viewer/README.md‎
Lines changed: 117 additions & 0 deletions
diff --git a/‎Learner Tooling/database-viewer/app.py‎
Lines changed: 81 additions & 0 deletions b/‎Learner Tooling/database-viewer/app.py‎
Lines changed: 81 additions & 0 deletions
diff --git a/‎Learner Tooling/database-viewer/config.py‎
Lines changed: 113 additions & 0 deletions b/‎Learner Tooling/database-viewer/config.py‎
Lines changed: 113 additions & 0 deletions
diff --git a/‎Learner Tooling/database-viewer/models/__init__.py‎
Lines changed: 4 additions & 0 deletions b/‎Learner Tooling/database-viewer/models/__init__.py‎
Lines changed: 4 additions & 0 deletions
@@ -107,3 +107,6 @@ yarn-error.log*
 Thumbs.db
 ehthumbs.db
 Desktop.ini
+
+# Large media assets — fetch from upstream course repos instead (see each course's README)
+*.mp4
@@ -0,0 +1,5 @@
+# Copy this file to .env and fill in your values
+OPENAI_API_KEY=your-openai-api-key-here
+FLASK_ENV=development
+SECRET_KEY=change-this-in-production
+CHROMA_PERSIST_DIR=./chroma_data
@@ -0,0 +1,35 @@
+# Environment variables (never commit secrets)
+.env
+.env.local
+.env.*.local
+
+# ChromaDB data
+chroma_data/
+
+# Python
+__pycache__/
+*.py[cod]
+*.pyo
+*.pyd
+.Python
+*.egg-info/
+dist/
+build/
+.eggs/
+*.egg
+
+# Virtual environments
+venv/
+.venv/
+env/
+ENV/
+
+# IDE
+.vscode/
+.idea/
+*.swp
+*.swo
+
+# OS
+.DS_Store
+Thumbs.db
@@ -0,0 +1,117 @@
+# ChromaDB Code Search UI
+
+A Flask web application for searching, browsing, and visualizing Python code using semantic embeddings and ChromaDB. Built as a companion app for the **Context Engineering with Chroma** course.
+
+## Purpose
+
+This app serves two roles in the course:
+
+1. **Teaching material** — Students ingest this codebase into ChromaDB using AST-based chunking pipelines they build in the labs. The well-structured Python code (models, services, routes, utils) makes it an ideal target for practicing chunking strategies.
+2. **Interactive tool** — Once ingested, students launch this app to explore their collections, run searches, and see how their chunking and metadata decisions affect retrieval quality.
+
+## Features
+
+- **Semantic search** — Natural language queries over code using OpenAI embeddings (`text-embedding-3-small`)
+- **Regex search** — Structural pattern matching across the codebase with analysis and explanation
+- **Collection explorer** — Paginated chunk browser with filters by file path, chunk type, and symbol name
+- **Code statistics** — Construct detection, size distributions, and symbol rankings
+- **Embedding visualizer** — 2D PCA projections of chunk embeddings to explore clustering
+- **Smart suggestions** — Context-aware query suggestions based on collection metadata
+- **Query history and bookmarks** — Persistent search history with color-coded bookmarks
+- **Interactive tutorials** — Guided tours with spotlight overlays for onboarding
+
+## Project Structure
+
+```
+app/
+├── app.py                  # Flask application factory and entry point
+├── config.py               # Dataclass-based configuration (env vars, defaults)
+├── requirements.txt        # Python dependencies
+├── .env.example            # Environment variable template
+│
+├── models/                 # Data models
+│   ├── chunk.py            # Chunk, ChunkMetadata, ChunkType
+│   ├── search_result.py    # SearchResult, SearchResultSet, ResultFormatter
+│   └── query_history.py    # QueryRecord, Bookmark, HistoryManager
+│
+├── routes/                 # Flask blueprints (one per feature)
+│   ├── search.py           # Semantic and regex search endpoints
+│   ├── collections.py      # Collection CRUD and ingestion triggers
+│   ├── explorer.py         # Paginated chunk browsing with filters
+│   ├── similarity.py       # Pairwise similarity matrix computation
+│   ├── history.py          # Query history and bookmarks API
+│   ├── regex_tester.py     # Regex testing and analysis
+│   ├── suggestions.py      # Smart query suggestions
+│   ├── statistics.py       # Code metrics and analytics
+│   ├── visualizer.py       # 2D embedding visualization
+│   └── tutorial.py         # Interactive guided tours
+│
+├── services/               # Business logic layer
+│   ├── chroma_client.py    # ChromaDB connection manager (singleton)
+│   ├── search_service.py   # Search strategies (semantic + regex)
+│   ├── collection_service.py   # Collection management and stats
+│   ├── ingestion_service.py    # AST parsing and code chunking pipeline
+│   ├── similarity_service.py   # Vector similarity computations
+│   ├── statistics_service.py   # Code metrics and analysis
+│   ├── visualization_service.py # PCA and random projection reducers
+│   ├── suggestion_service.py   # Multi-strategy suggestion generator
+│   └── tutorial_service.py     # Tutorial builder and manager
+│
+├── utils/                  # Utilities and helpers
+│   ├── validators.py       # Input validation (queries, paths, regex)
+│   ├── regex_engine.py     # Regex analysis and human-readable explanation
+│   ├── code_parser.py      # Lightweight regex-based Python parser
+│   ├── text_splitter.py    # Token-based text splitting
+│   └── formatters.py       # Display formatting (scores, code, paths)
+│
+├── templates/              # Jinja2 HTML templates
+│   ├── base.html           # Base layout with navbar and tutorial engine
+│   ├── index.html          # Dashboard (collection cards)
+│   ├── search.html         # Search interface
+│   ├── explorer.html       # Chunk browser
+│   └── collection.html     # Collection detail page
+│
+└── static/
+    └── css/style.css       # Custom styles
+```
+
+## Design Patterns
+
+The codebase intentionally demonstrates several software design patterns, making it a richer target for code search exercises:
+
+- **Strategy** — `SearchStrategy`, `SimilarityComputer`, `DimensionReducer`, `SuggestionStrategy`
+- **Singleton** — `ChromaClientManager` for a single DB connection
+- **Factory** — `get_reducer()`, `get_similarity_computer()`, `get_tutorial_builder()`
+- **Builder** — Tutorial builders (`DashboardTutorialBuilder`, `CollectionTutorialBuilder`)
+- **Facade** — `SearchService`, `SuggestionService`, `StatisticsService` wrapping multiple strategies
+
+## Setup
+
+1. Install dependencies:
+   ```bash
+   pip install -r requirements.txt
+   ```
+
+2. Configure environment variables (copy `.env.example` to `.env`):
+   ```
+   OPENAI_API_KEY=sk-your-key-here
+   CHROMA_PERSIST_DIR=./chroma_data
+   ```
+
+3. Run the app:
+   ```bash
+   python app.py
+   ```
+
+## Dependencies
+
+| Package | Purpose |
+|---------|---------|
+| flask | Web framework |
+| chromadb | Vector database |
+| openai | Embedding API |
+| tiktoken | Token counting |
+| tree-sitter | AST parsing |
+| tree-sitter-python | Python grammar for tree-sitter |
+| python-dotenv | Environment variable management |
+| pathspec | `.gitignore` pattern matching |
@@ -0,0 +1,81 @@
+"""Flask application entry point for the ChromaDB Code Search UI."""
+
+import os
+import sys
+import logging
+from flask import Flask, jsonify
+from dotenv import load_dotenv
+
+# Ensure the app directory is on the Python path
+sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
+
+from config import get_config
+from routes import register_blueprints
+
+
+class ReverseProxied:
+    """WSGI middleware that sets SCRIPT_NAME from an environment variable.
+
+    When Flask runs behind a reverse proxy at a URL prefix (e.g. /flask/),
+    this middleware tells Flask about the prefix so that url_for() generates
+    correct URLs. Set SCRIPT_NAME=/flask in the environment to activate.
+    """
+
+    def __init__(self, app, script_name=""):
+        self.app = app
+        self.script_name = script_name
+
+    def __call__(self, environ, start_response):
+        if self.script_name:
+            environ["SCRIPT_NAME"] = self.script_name
+            path_info = environ.get("PATH_INFO", "")
+            if path_info.startswith(self.script_name):
+                environ["PATH_INFO"] = path_info[len(self.script_name):]
+        return self.app(environ, start_response)
+
+
+def create_app() -> Flask:
+    """Application factory: create and configure the Flask app."""
+    load_dotenv()
+    config = get_config()
+
+    app = Flask(__name__)
+    app.secret_key = config.secret_key
+
+    # Support running behind a reverse proxy with a URL prefix
+    script_name = os.environ.get("SCRIPT_NAME", "")
+    if script_name:
+        app.wsgi_app = ReverseProxied(app.wsgi_app, script_name=script_name)
+
+    # Configure logging
+    logging.basicConfig(
+        level=logging.DEBUG if config.debug else logging.INFO,
+        format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
+    )
+
+    # Register all route blueprints
+    register_blueprints(app)
+
+    # Error handlers
+    @app.errorhandler(404)
+    def not_found(error):
+        return jsonify({"error": "Not found"}), 404
+
+    @app.errorhandler(500)
+    def internal_error(error):
+        return jsonify({"error": "Internal server error"}), 500
+
+    app.logger.info(
+        f"ChromaDB UI started in {config.environment.value} mode "
+        f"on {config.host}:{config.port}"
+    )
+
+    return app
+
+
+app = create_app()
+
+
+if __name__ == "__main__":
+    config = get_config()
+    app.run(host=config.host, port=config.port, debug=config.debug)
@@ -0,0 +1,113 @@
+"""Application configuration with environment-aware settings."""
+
+import os
+from dataclasses import dataclass, field
+from enum import Enum
+from typing import Optional
+from pathlib import Path
+
+
+class Environment(Enum):
+    """Application environment modes."""
+    DEVELOPMENT = "development"
+    PRODUCTION = "production"
+    TESTING = "testing"
+
+
+class EmbeddingModel(Enum):
+    """Supported embedding models for ChromaDB."""
+    OPENAI_SMALL = "text-embedding-3-small"
+    OPENAI_LARGE = "text-embedding-3-large"
+    DEFAULT = "default"
+
+
+@dataclass
+class ChromaConfig:
+    """Configuration for ChromaDB connection."""
+    persist_directory: str = "./chroma_data"
+    default_collection: str = "code_collection"
+    embedding_model: EmbeddingModel = EmbeddingModel.OPENAI_SMALL
+    batch_size: int = 100
+    max_results: int = 20
+
+    def __post_init__(self):
+        """Ensure persist directory exists."""
+        Path(self.persist_directory).mkdir(parents=True, exist_ok=True)
+
+
+@dataclass
+class SearchConfig:
+    """Configuration for search behavior."""
+    default_n_results: int = 10
+    max_n_results: int = 50
+    min_query_length: int = 2
+    max_query_length: int = 500
+    score_precision: int = 4
+    regex_max_results: int = 100
+    regex_timeout_seconds: float = 5.0
+
+
+@dataclass
+class IngestionConfig:
+    """Configuration for the code ingestion pipeline."""
+    max_tokens_per_chunk: int = 1000
+    supported_extensions: tuple = (".py",)
+    ignore_patterns: tuple = ("__pycache__", ".git", ".env", "node_modules")
+    batch_size: int = 100
+    tokenizer_model: str = "text-embedding-3-small"
+    fallback_encoding: str = "cl100k_base"
+
+
+@dataclass
+class ExportConfig:
+    """Configuration for the export service."""
+    default_format: str = "json"
+    supported_formats: tuple = ("json", "csv")
+    max_export_chunks: int = 10000
+
+
+@dataclass
+class DiffConfig:
+    """Configuration for the collection diff service."""
+    similarity_threshold: float = 0.98
+    max_diff_results: int = 50
+    include_modified_by_default: bool = True
+
+
+@dataclass
+class AppConfig:
+    """Root application configuration combining all sub-configs."""
+    environment: Environment = field(default_factory=lambda: Environment(
+        os.getenv("FLASK_ENV", "development")
+    ))
+    secret_key: str = field(default_factory=lambda:
+        os.getenv("SECRET_KEY", "dev-secret-key-change-in-production")
+    )
+    host: str = "0.0.0.0"
+    port: int = 5000
+    debug: bool = field(init=False)
+    openai_api_key: Optional[str] = field(default_factory=lambda:
+        os.getenv("OPENAI_API_KEY")
+    )
+    chroma: ChromaConfig = field(default_factory=ChromaConfig)
+    search: SearchConfig = field(default_factory=SearchConfig)
+    ingestion: IngestionConfig = field(default_factory=IngestionConfig)
+    export: ExportConfig = field(default_factory=ExportConfig)
+    diff: DiffConfig = field(default_factory=DiffConfig)
+
+    def __post_init__(self):
+        self.debug = self.environment == Environment.DEVELOPMENT
+
+    @classmethod
+    def from_environment(cls) -> "AppConfig":
+        """Factory: build config from environment variables."""
+        return cls(
+            chroma=ChromaConfig(
+                persist_directory=os.getenv("CHROMA_PERSIST_DIR", "./chroma_data"),
+            ),
+        )
+
+
+def get_config() -> AppConfig:
+    """Get the current application configuration."""
+    return AppConfig.from_environment()
@@ -0,0 +1,4 @@
+"""Data models for the ChromaDB UI application."""
+
+from models.chunk import Chunk, ChunkType, ChunkMetadata
+from models.search_result import SearchResult, SearchResultSet, SortOrder, ResultFormatter