fuzziecoder
diff --git a/‎pipeline/README.md‎
Lines changed: 6 additions & 2 deletions b/‎pipeline/README.md‎
Lines changed: 6 additions & 2 deletions
diff --git a/‎pipeline/backend/api/routes/health.py‎
Lines changed: 11 additions & 0 deletions b/‎pipeline/backend/api/routes/health.py‎
Lines changed: 11 additions & 0 deletions
diff --git a/‎pipeline/backend/api/routes/monitoring.py‎
Lines changed: 56 additions & 0 deletions b/‎pipeline/backend/api/routes/monitoring.py‎
Lines changed: 56 additions & 0 deletions
diff --git a/‎pipeline/backend/config.py‎
Lines changed: 18 additions & 0 deletions b/‎pipeline/backend/config.py‎
Lines changed: 18 additions & 0 deletions
diff --git a/‎pipeline/backend/core/elasticsearch_client.py‎
Lines changed: 146 additions & 0 deletions b/‎pipeline/backend/core/elasticsearch_client.py‎
Lines changed: 146 additions & 0 deletions
diff --git a/‎pipeline/backend/core/redis_services.py‎
Lines changed: 84 additions & 0 deletions b/‎pipeline/backend/core/redis_services.py‎
Lines changed: 84 additions & 0 deletions
@@ -7,6 +7,7 @@ A production-ready pipeline automation system built with:
 - **Redis** - State management, locks, caching
 - **PostgreSQL** - Persistence
 - **AI Safety Module** - Failure prediction & anomaly handling
+- **Elasticsearch** - Execution log indexing, fast filtering, analytics
 - **BentoML + Feast + Kubeflow** - End-to-end model infrastructure
 - **Prometheus + Grafana** - Metrics collection and dashboards
 - **ELK Stack (Elasticsearch, Logstash, Kibana)** - Centralized logging
@@ -228,6 +229,8 @@ curl -X POST http://localhost:8000/api/executions/pipeline-xxx/execute
 |----------|---------|-------------|
 | `DATABASE_URL` | - | PostgreSQL connection string |
 | `REDIS_URL` | `redis://localhost:6379/0` | Redis connection string |
+| `ELASTICSEARCH_URL` | `http://localhost:9200` | Elasticsearch connection string |
+| `ELASTICSEARCH_LOGS_INDEX` | `flexiroaster-execution-logs` | Logs index for search |
 | `EXECUTOR_MAX_RETRIES` | `3` | Max retries per stage |
 | `EXECUTOR_STAGE_TIMEOUT` | `120` | Stage timeout in seconds |
 | `AI_BLOCK_HIGH_RISK` | `false` | Block high-risk executions |
@@ -289,8 +292,9 @@ pipeline/
 |-----------|----------------|
 | **Airflow** | Scheduling, retries, dependencies |
 | **FastAPI** | Business logic, execution, AI safety |
-| **Redis** | Locks, caching, real-time state |
-| **PostgreSQL** | History, definitions, logs |
+| **Redis** | Locks, caching, real-time state, rate limit, sessions, job queue |
+| **PostgreSQL** | History, definitions, canonical execution records |
+| **Elasticsearch** | Execution log indexing, fast search, filtering, analytics |
 
 ### Fail-Safe Design
 
 
@@ -17,6 +17,7 @@
 from db.database import check_database_health
 from core.redis_state import redis_state_manager
 from config import settings
+from core.elasticsearch_client import elasticsearch_manager
 
 logger = logging.getLogger(__name__)
 
@@ -38,6 +39,7 @@ async def health_check(
     - Application
     - Database
     - Redis
+    - Elasticsearch
     """
     services = {}
     overall_status = "healthy"
@@ -63,6 +65,15 @@ async def health_check(
             services["redis"].details["note"] = "Running in fallback mode"
         else:
             overall_status = "degraded"
+
+    # Check Elasticsearch
+    elastic_health = await elasticsearch_manager.health_check()
+    services["elasticsearch"] = ServiceHealth(
+        status=elastic_health.get("status", "unknown"),
+        details=elastic_health
+    )
+    if elastic_health.get("status") not in ["healthy", "disabled"]:
+        overall_status = "degraded"
 
     return HealthResponse(
         status=overall_status,
 
@@ -5,6 +5,8 @@
 from fastapi import APIRouter, HTTPException
 
 from ai.monitoring_engine import monitoring_engine
+from core.elasticsearch_client import elasticsearch_manager
+from core.redis_services import redis_service_layer
 
 router = APIRouter(prefix="/monitoring", tags=["monitoring"])
 
@@ -51,3 +53,57 @@ async def get_monitoring_snapshot(pipeline_id: str):
         },
         "generated_at": snapshot.generated_at.isoformat(),
     }
+
+
+@router.get("/logs/search", response_model=Dict[str, Any])
+async def search_execution_logs(
+    query: str,
+    pipeline_id: str | None = None,
+    execution_id: str | None = None,
+    level: str | None = None,
+    limit: int = 100,
+):
+    """Search indexed execution logs in Elasticsearch for fast filtering and analytics."""
+    levels = [level] if level else None
+    results = await elasticsearch_manager.search_logs(
+        query=query,
+        pipeline_id=pipeline_id,
+        execution_id=execution_id,
+        levels=levels,
+        limit=limit,
+    )
+    return {"query": query, "count": len(results), "results": results}
+
+
+@router.post("/cache/session/{session_id}", response_model=Dict[str, Any])
+async def cache_session(session_id: str, payload: Dict[str, Any]):
+    """Store session payload in Redis-backed session storage."""
+    success = await redis_service_layer.set_session(session_id, payload)
+    return {"success": success, "session_id": session_id}
+
+
+@router.get("/cache/session/{session_id}", response_model=Dict[str, Any])
+async def fetch_session(session_id: str):
+    """Fetch session payload from Redis-backed session storage."""
+    session = await redis_service_layer.get_session(session_id)
+    return {"session_id": session_id, "session": session}
+
+
+@router.post("/rate-limit/{identifier}", response_model=Dict[str, Any])
+async def check_rate_limit(identifier: str):
+    """Apply Redis-backed rate limiting check for an identifier."""
+    return await redis_service_layer.check_rate_limit(identifier)
+
+
+@router.post("/jobs/enqueue", response_model=Dict[str, Any])
+async def enqueue_background_job(payload: Dict[str, Any]):
+    """Push a background job payload into Redis list-based broker queue."""
+    success = await redis_service_layer.enqueue_job(payload)
+    return {"success": success}
+
+
+@router.post("/jobs/dequeue", response_model=Dict[str, Any])
+async def dequeue_background_job():
+    """Pop a background job payload from Redis broker queue."""
+    job = await redis_service_layer.dequeue_job()
+    return {"job": job, "found": job is not None}
@@ -53,6 +53,13 @@ class Settings(BaseSettings):
     REDIS_SOCKET_TIMEOUT: float = 5.0
     REDIS_RETRY_ON_TIMEOUT: bool = True
     REDIS_DECODE_RESPONSES: bool = True
+    REDIS_SESSION_TTL: int = 86400  # 24 hours
+    REDIS_RATE_LIMIT_WINDOW_SECONDS: int = 60
+    REDIS_RATE_LIMIT_MAX_REQUESTS: int = 120
+
+    # Redis queue/broker settings
+    REDIS_JOB_QUEUE_KEY: str = "flexiroaster:jobs:default"
+    REDIS_JOB_QUEUE_TIMEOUT: int = 5
 
     # Execution Lock Settings
     EXECUTION_LOCK_TTL: int = 3600  # 1 hour
@@ -95,6 +102,17 @@ class Settings(BaseSettings):
     LOG_LEVEL: str = "INFO"
     LOG_FORMAT: str = "json"  # "json" or "text"
     LOG_FILE: Optional[str] = None
+
+    # ===================
+    # Elasticsearch Settings
+    # ===================
+    ELASTICSEARCH_ENABLED: bool = True
+    ELASTICSEARCH_URL: str = "http://localhost:9200"
+    ELASTICSEARCH_USERNAME: Optional[str] = None
+    ELASTICSEARCH_PASSWORD: Optional[str] = None
+    ELASTICSEARCH_VERIFY_CERTS: bool = True
+    ELASTICSEARCH_LOGS_INDEX: str = "flexiroaster-execution-logs"
+    ELASTICSEARCH_REQUEST_TIMEOUT: int = 10
 
     # ===================
     # Observability Settings
 
@@ -0,0 +1,146 @@
+"""
+Elasticsearch client utilities for FlexiRoaster.
+Handles log indexing and search/filter operations for execution analytics.
+"""
+import logging
+from typing import Optional, Dict, Any, List
+
+from elasticsearch import AsyncElasticsearch
+
+from config import settings
+
+logger = logging.getLogger(__name__)
+
+
+class ElasticsearchManager:
+    """Manages Elasticsearch lifecycle and execution log indexing."""
+
+    def __init__(self) -> None:
+        self._client: Optional[AsyncElasticsearch] = None
+        self._available: bool = False
+
+    @property
+    def is_available(self) -> bool:
+        return self._available
+
+    async def initialize(self) -> bool:
+        if not settings.ELASTICSEARCH_ENABLED:
+            logger.info("Elasticsearch disabled via configuration")
+            self._available = False
+            return False
+
+        try:
+            auth = None
+            if settings.ELASTICSEARCH_USERNAME and settings.ELASTICSEARCH_PASSWORD:
+                auth = (settings.ELASTICSEARCH_USERNAME, settings.ELASTICSEARCH_PASSWORD)
+
+            self._client = AsyncElasticsearch(
+                hosts=[settings.ELASTICSEARCH_URL],
+                basic_auth=auth,
+                verify_certs=settings.ELASTICSEARCH_VERIFY_CERTS,
+                request_timeout=settings.ELASTICSEARCH_REQUEST_TIMEOUT,
+            )
+
+            await self._client.ping()
+            await self.ensure_index()
+
+            self._available = True
+            logger.info("Elasticsearch initialized")
+            return True
+        except Exception as e:
+            logger.warning(f"Elasticsearch unavailable, continuing without indexing: {e}")
+            self._available = False
+            return False
+
+    async def close(self) -> None:
+        if self._client:
+            await self._client.close()
+        self._available = False
+
+    async def ensure_index(self) -> None:
+        if not self._client:
+            return
+
+        index_name = settings.ELASTICSEARCH_LOGS_INDEX
+        exists = await self._client.indices.exists(index=index_name)
+        if exists:
+            return
+
+        await self._client.indices.create(
+            index=index_name,
+            mappings={
+                "properties": {
+                    "timestamp": {"type": "date"},
+                    "execution_id": {"type": "keyword"},
+                    "pipeline_id": {"type": "keyword"},
+                    "stage_id": {"type": "keyword"},
+                    "level": {"type": "keyword"},
+                    "message": {"type": "text"},
+                    "metadata": {"type": "object", "enabled": True},
+                }
+            },
+        )
+
+    async def health_check(self) -> Dict[str, Any]:
+        if not settings.ELASTICSEARCH_ENABLED:
+            return {"status": "disabled"}
+
+        if not self._client:
+            return {"status": "disconnected"}
+
+        try:
+            health = await self._client.cluster.health()
+            return {
+                "status": "healthy",
+                "cluster_status": health.get("status"),
+                "number_of_nodes": health.get("number_of_nodes"),
+            }
+        except Exception as e:
+            return {"status": "unhealthy", "error": str(e)}
+
+    async def index_execution_log(self, document: Dict[str, Any]) -> bool:
+        if not self._available or not self._client:
+            return False
+
+        try:
+            await self._client.index(index=settings.ELASTICSEARCH_LOGS_INDEX, document=document)
+            return True
+        except Exception as e:
+            logger.warning(f"Failed to index log in Elasticsearch: {e}")
+            return False
+
+    async def search_logs(
+        self,
+        query: str,
+        pipeline_id: Optional[str] = None,
+        execution_id: Optional[str] = None,
+        levels: Optional[List[str]] = None,
+        limit: int = 100,
+    ) -> List[Dict[str, Any]]:
+        if not self._available or not self._client:
+            return []
+
+        filters = []
+        if pipeline_id:
+            filters.append({"term": {"pipeline_id": pipeline_id}})
+        if execution_id:
+            filters.append({"term": {"execution_id": execution_id}})
+        if levels:
+            filters.append({"terms": {"level": levels}})
+
+        body: Dict[str, Any] = {
+            "size": limit,
+            "query": {
+                "bool": {
+                    "must": [{"multi_match": {"query": query, "fields": ["message", "metadata.*"]}}],
+                    "filter": filters,
+                }
+            },
+            "sort": [{"timestamp": {"order": "desc"}}],
+        }
+
+        response = await self._client.search(index=settings.ELASTICSEARCH_LOGS_INDEX, body=body)
+        return [hit.get("_source", {}) for hit in response.get("hits", {}).get("hits", [])]
+
+
+elasticsearch_manager = ElasticsearchManager()
@@ -0,0 +1,84 @@
+"""
+Redis-backed services for sessions, rate limiting, and background job brokering.
+"""
+import json
+from datetime import datetime
+from typing import Any, Dict, Optional
+
+from config import settings
+from core.redis_state import redis_state_manager
+
+
+class RedisServiceLayer:
+    SESSION_KEY = "flexiroaster:session:{session_id}"
+    RATE_LIMIT_KEY = "flexiroaster:ratelimit:{identifier}:{window}"
+
+    async def set_session(self, session_id: str, payload: Dict[str, Any], ttl: Optional[int] = None) -> bool:
+        if not redis_state_manager.is_available:
+            return False
+
+        key = self.SESSION_KEY.format(session_id=session_id)
+        ttl_value = ttl or settings.REDIS_SESSION_TTL
+        await redis_state_manager.client.set(key, json.dumps(payload), ex=ttl_value)
+        return True
+
+    async def get_session(self, session_id: str) -> Optional[Dict[str, Any]]:
+        if not redis_state_manager.is_available:
+            return None
+
+        key = self.SESSION_KEY.format(session_id=session_id)
+        value = await redis_state_manager.client.get(key)
+        if not value:
+            return None
+        return json.loads(value)
+
+    async def check_rate_limit(
+        self,
+        identifier: str,
+        max_requests: Optional[int] = None,
+        window_seconds: Optional[int] = None,
+    ) -> Dict[str, Any]:
+        if not redis_state_manager.is_available:
+            return {"allowed": True, "remaining": -1, "window_seconds": window_seconds or settings.REDIS_RATE_LIMIT_WINDOW_SECONDS}
+
+        limit = max_requests or settings.REDIS_RATE_LIMIT_MAX_REQUESTS
+        window = window_seconds or settings.REDIS_RATE_LIMIT_WINDOW_SECONDS
+        current_window = int(datetime.now().timestamp() // window)
+
+        key = self.RATE_LIMIT_KEY.format(identifier=identifier, window=current_window)
+        current_count = await redis_state_manager.client.incr(key)
+        if current_count == 1:
+            await redis_state_manager.client.expire(key, window)
+
+        remaining = max(limit - current_count, 0)
+        return {
+            "allowed": current_count <= limit,
+            "remaining": remaining,
+            "window_seconds": window,
+            "used": current_count,
+            "limit": limit,
+        }
+
+    async def enqueue_job(self, payload: Dict[str, Any], queue_key: Optional[str] = None) -> bool:
+        if not redis_state_manager.is_available:
+            return False
+
+        key = queue_key or settings.REDIS_JOB_QUEUE_KEY
+        await redis_state_manager.client.lpush(key, json.dumps(payload))
+        return True
+
+    async def dequeue_job(self, queue_key: Optional[str] = None, timeout: Optional[int] = None) -> Optional[Dict[str, Any]]:
+        if not redis_state_manager.is_available:
+            return None
+
+        key = queue_key or settings.REDIS_JOB_QUEUE_KEY
+        wait_timeout = timeout or settings.REDIS_JOB_QUEUE_TIMEOUT
+        result = await redis_state_manager.client.brpop(key, timeout=wait_timeout)
+        if not result:
+            return None
+
+        _, payload = result
+        return json.loads(payload)
+
+
+redis_service_layer = RedisServiceLayer()