Skip to content

Latest commit

 

History

History
510 lines (411 loc) · 12.9 KB

File metadata and controls

510 lines (411 loc) · 12.9 KB

Architecture Diagrams

This document contains visual representations of the CloudMon Metrics Processor architecture using Mermaid diagrams.

System Architecture

High-Level Component Diagram

graph TB
    subgraph External["External Systems"]
        TSDB["Graphite<br/>(Time-Series Database)"]
        Dashboard["Status Dashboard<br/>(Atlassian Statuspage)"]
        Grafana["Grafana<br/>(Visualization)"]
    end
    
    subgraph MetricsProcessor["CloudMon Metrics Processor"]
        subgraph Convertor["Convertor Binary"]
            API["HTTP API<br/>(Axum)"]
            GraphiteModule["Graphite Module<br/>(TSDB Client)"]
            Common["Common Module<br/>(Evaluation Logic)"]
            Config["Config Module<br/>(YAML Parser)"]
            Types["Types Module<br/>(Domain Models)"]
        end
        
        subgraph Reporter["Reporter Binary"]
            Poller["Metric Poller<br/>(60s interval)"]
            Notifier["Dashboard Notifier<br/>(JWT Auth)"]
        end
    end
    
    TSDB -->|"Raw Metrics"| GraphiteModule
    GraphiteModule --> Common
    Common --> API
    API -->|"Health Data"| Grafana
    API -->|"Health Data"| Poller
    Poller --> Notifier
    Notifier -->|"Status Updates"| Dashboard
    
    Config --> Types
    Types --> Common
    Types --> GraphiteModule
    
    style Convertor fill:#e1f5fe
    style Reporter fill:#fff3e0
    style External fill:#f5f5f5
Loading

Component Interactions

graph LR
    subgraph Inputs["Inputs"]
        ConfigFile["config.yaml"]
        EnvVars["Environment Variables"]
        RawMetrics["Raw TSDB Metrics"]
    end
    
    subgraph Processing["Processing"]
        Parse["Parse Config"]
        Template["Apply Templates"]
        Query["Query TSDB"]
        Evaluate["Evaluate Flags"]
        Combine["Combine to Health"]
    end
    
    subgraph Outputs["Outputs"]
        RESTAPI["REST API Response"]
        GrafanaData["Grafana Datapoints"]
        StatusUpdate["Dashboard Update"]
    end
    
    ConfigFile --> Parse
    EnvVars --> Parse
    Parse --> Template
    Template --> Query
    RawMetrics --> Query
    Query --> Evaluate
    Evaluate --> Combine
    Combine --> RESTAPI
    Combine --> GrafanaData
    RESTAPI --> StatusUpdate
Loading

Module Dependency Graph

Library Crate Dependencies

graph TD
    subgraph lib["cloudmon_metrics (lib)"]
        lib_rs["lib.rs<br/>(re-exports)"]
    end
    
    subgraph modules["Core Modules"]
        api["api.rs<br/>(module declaration)"]
        api_v1["api/v1.rs<br/>(REST handlers)"]
        config["config.rs<br/>(YAML + env parsing)"]
        types["types.rs<br/>(domain types)"]
        graphite["graphite.rs<br/>(TSDB client)"]
        common["common.rs<br/>(shared utils)"]
    end
    
    subgraph binaries["Binaries"]
        convertor["bin/convertor.rs"]
        reporter["bin/reporter.rs"]
    end
    
    lib_rs --> api
    lib_rs --> config
    lib_rs --> types
    lib_rs --> graphite
    lib_rs --> common
    
    api --> api_v1
    api_v1 --> common
    api_v1 --> types
    
    graphite --> common
    graphite --> types
    
    common --> types
    common --> graphite
    
    types --> config
    
    convertor --> lib_rs
    reporter --> lib_rs
    reporter --> api_v1
    
    style lib fill:#e8f5e9
    style modules fill:#e3f2fd
    style binaries fill:#fce4ec
Loading

External Crate Dependencies

graph TB
    subgraph Application["Application"]
        Convertor["Convertor"]
        Reporter["Reporter"]
    end
    
    subgraph WebFramework["Web Framework"]
        Axum["axum 0.6"]
        Tower["tower-http"]
        Hyper["hyper"]
    end
    
    subgraph AsyncRuntime["Async Runtime"]
        Tokio["tokio"]
        TokioSignal["tokio::signal"]
    end
    
    subgraph HTTP["HTTP Client"]
        Reqwest["reqwest"]
    end
    
    subgraph Serialization["Serialization"]
        Serde["serde"]
        SerdeJson["serde_json"]
        SerdeYaml["config (yaml)"]
    end
    
    subgraph Evaluation["Expression Evaluation"]
        Evalexpr["evalexpr"]
    end
    
    subgraph Auth["Authentication"]
        JWT["jwt"]
        HMAC["hmac + sha2"]
    end
    
    subgraph Utilities["Utilities"]
        Chrono["chrono"]
        Regex["regex"]
        Uuid["uuid"]
        Tracing["tracing"]
    end
    
    Convertor --> Axum
    Convertor --> Tower
    Convertor --> Tokio
    Convertor --> Reqwest
    Convertor --> Serde
    Convertor --> Evalexpr
    Convertor --> Tracing
    
    Reporter --> Tokio
    Reporter --> Reqwest
    Reporter --> Serde
    Reporter --> JWT
    Reporter --> HMAC
    Reporter --> Tracing
    
    Axum --> Hyper
    Axum --> Tower
    Axum --> Tokio
    
    style Application fill:#bbdefb
    style WebFramework fill:#c8e6c9
    style AsyncRuntime fill:#fff9c4
Loading

Deployment Architecture

Standalone Deployment

graph TB
    subgraph Host["Host / Container"]
        subgraph ConvertorProcess["cloudmon-metrics-convertor"]
            Server["HTTP Server<br/>:3000"]
            ConvConfig["config.yaml"]
        end
        
        subgraph ReporterProcess["cloudmon-metrics-reporter"]
            Poller["Polling Loop"]
            RepConfig["config.yaml"]
        end
    end
    
    subgraph External["External Services"]
        Graphite["Graphite TSDB"]
        StatusDB["Status Dashboard"]
    end
    
    Graphite -->|"HTTP GET /render"| Server
    Server -->|"HTTP GET /api/v1/health"| Poller
    Poller -->|"HTTP POST /v1/component_status"| StatusDB
    
    ConvConfig -.->|"reads"| Server
    RepConfig -.->|"reads"| Poller
    
    style Host fill:#e0f2f1
    style ConvertorProcess fill:#b3e5fc
    style ReporterProcess fill:#ffe0b2
Loading

Kubernetes Deployment

graph TB
    subgraph K8s["Kubernetes Cluster"]
        subgraph NSMetrics["Namespace: metrics"]
            subgraph DeployConv["Deployment: convertor"]
                Pod1["Pod<br/>convertor"]
                Pod2["Pod<br/>convertor"]
            end
            
            SvcConv["Service: convertor<br/>ClusterIP:3000"]
            
            subgraph DeployRep["Deployment: reporter"]
                Pod3["Pod<br/>reporter"]
            end
            
            CM["ConfigMap<br/>config.yaml"]
            Secret["Secret<br/>JWT credentials"]
        end
    end
    
    subgraph External["External"]
        Graphite["Graphite TSDB"]
        StatusDB["Status Dashboard"]
        Ingress["Ingress Controller"]
    end
    
    Pod1 --> SvcConv
    Pod2 --> SvcConv
    Pod3 -->|"polls"| SvcConv
    
    Graphite --> Pod1
    Graphite --> Pod2
    Pod3 --> StatusDB
    
    CM -.-> Pod1
    CM -.-> Pod2
    CM -.-> Pod3
    Secret -.-> Pod3
    
    Ingress -.->|"optional"| SvcConv
    
    style K8s fill:#f3e5f5
    style NSMetrics fill:#e1bee7
    style DeployConv fill:#b3e5fc
    style DeployRep fill:#ffe0b2
Loading

High Availability Setup

graph TB
    subgraph Region1["Region 1"]
        LB1["Load Balancer"]
        subgraph Convertor1["Convertor Cluster"]
            C1a["Convertor A"]
            C1b["Convertor B"]
        end
        R1["Reporter"]
    end
    
    subgraph Region2["Region 2"]
        LB2["Load Balancer"]
        subgraph Convertor2["Convertor Cluster"]
            C2a["Convertor A"]
            C2b["Convertor B"]
        end
        R2["Reporter"]
    end
    
    subgraph Shared["Shared Services"]
        TSDB["Graphite<br/>(Replicated)"]
        Dashboard["Status Dashboard"]
    end
    
    LB1 --> C1a
    LB1 --> C1b
    R1 --> LB1
    
    LB2 --> C2a
    LB2 --> C2b
    R2 --> LB2
    
    TSDB --> C1a
    TSDB --> C1b
    TSDB --> C2a
    TSDB --> C2b
    
    R1 --> Dashboard
    R2 --> Dashboard
    
    style Region1 fill:#e3f2fd
    style Region2 fill:#fff3e0
    style Shared fill:#e8f5e9
Loading

Request Flow Diagrams

Health API Request Flow

sequenceDiagram
    participant Client
    participant API as API Layer<br/>(api/v1.rs)
    participant Common as Common Module<br/>(common.rs)
    participant Graphite as Graphite Module<br/>(graphite.rs)
    participant TSDB as Graphite TSDB
    
    Client->>API: GET /api/v1/health?service=svc&env=prod
    API->>API: Validate query params
    API->>Common: get_service_health(svc, env, from, to)
    Common->>Common: Load health_metric config
    Common->>Graphite: get_graphite_data(queries)
    Graphite->>TSDB: GET /render?target=...
    TSDB-->>Graphite: JSON datapoints
    Graphite-->>Common: Vec<GraphiteData>
    Common->>Common: Evaluate flag metrics
    Common->>Common: Build expression context
    Common->>Common: Evaluate health expressions
    Common-->>API: ServiceHealthData
    API-->>Client: JSON response
Loading

Reporter Notification Flow

sequenceDiagram
    participant Timer as Tokio Timer
    participant Reporter as Reporter
    participant Convertor as Convertor API
    participant Dashboard as Status Dashboard
    
    loop Every 60 seconds
        Timer->>Reporter: tick
        Reporter->>Reporter: For each environment
        Reporter->>Reporter: For each service
        Reporter->>Convertor: GET /api/v1/health
        Convertor-->>Reporter: ServiceHealthResponse
        
        alt Health status > 0 (degraded/outage)
            Reporter->>Dashboard: POST /v1/component_status
            Dashboard-->>Reporter: 200 OK
        end
    end
Loading

Data Structure Diagrams

Configuration Hierarchy

graph TD
    subgraph Config["Config Structure"]
        Root["Config"]
        Root --> DS["datasource"]
        Root --> Server["server"]
        Root --> Templates["metric_templates"]
        Root --> Envs["environments"]
        Root --> Flags["flag_metrics"]
        Root --> Health["health_metrics"]
        Root --> Status["status_dashboard"]
        
        DS --> DS_URL["url: String"]
        DS --> DS_Timeout["timeout: u16"]
        
        Server --> Server_Addr["address: String"]
        Server --> Server_Port["port: u16"]
        
        Templates --> Tmpl["Template"]
        Tmpl --> Tmpl_Query["query: String"]
        Tmpl --> Tmpl_Op["op: CmpType"]
        Tmpl --> Tmpl_Thresh["threshold: f32"]
        
        Envs --> Env["EnvironmentDef"]
        Env --> Env_Name["name: String"]
        Env --> Env_Attrs["attributes: HashMap"]
        
        Flags --> Flag["FlagMetricDef"]
        Flag --> Flag_Name["name: String"]
        Flag --> Flag_Service["service: String"]
        Flag --> Flag_Template["template: TemplateRef"]
        Flag --> Flag_Envs["environments: Vec"]
        
        Health --> HM["ServiceHealthDef"]
        HM --> HM_Service["service: String"]
        HM --> HM_Category["category: String"]
        HM --> HM_Metrics["metrics: Vec<String>"]
        HM --> HM_Exprs["expressions: Vec"]
        
        Status --> Status_URL["url: String"]
        Status --> Status_Secret["secret: Option<String>"]
    end
    
    style Config fill:#e8eaf6
Loading

AppState Structure

graph LR
    subgraph AppState["AppState (Runtime)"]
        AS_Config["config: Config"]
        AS_Templates["metric_templates:<br/>HashMap<String, BinaryMetricRawDef>"]
        AS_Client["req_client:<br/>reqwest::Client"]
        AS_Flags["flag_metrics:<br/>HashMap<String, HashMap<String, FlagMetric>>"]
        AS_Health["health_metrics:<br/>HashMap<String, ServiceHealthDef>"]
        AS_Envs["environments:<br/>Vec<EnvironmentDef>"]
        AS_Services["services:<br/>HashSet<String>"]
    end
    
    subgraph Lookup["Lookup Pattern"]
        Query["Query: env=prod, service=api"]
        FlagLookup["flag_metrics.get('api.latency')?.get('prod')"]
        HealthLookup["health_metrics.get('api')"]
    end
    
    Query --> FlagLookup
    Query --> HealthLookup
    FlagLookup --> AS_Flags
    HealthLookup --> AS_Health
    
    style AppState fill:#e0f7fa
    style Lookup fill:#fff8e1
Loading

Error Handling Flow

graph TD
    subgraph Errors["CloudMonError Types"]
        E1["ServiceNotSupported"]
        E2["EnvNotSupported"]
        E3["ExpressionError"]
        E4["GraphiteError"]
    end
    
    subgraph Handling["Error Handling"]
        Check1{"Service in config?"}
        Check2{"Env for service?"}
        Check3{"Expression valid?"}
        Check4{"TSDB responds?"}
        
        Check1 -->|No| E1
        Check1 -->|Yes| Check2
        Check2 -->|No| E2
        Check2 -->|Yes| Check4
        Check4 -->|No| E4
        Check4 -->|Yes| Check3
        Check3 -->|No| E3
        Check3 -->|Yes| Success["Return health data"]
    end
    
    subgraph HTTPResponse["HTTP Response Mapping"]
        E1 --> R409a["409 Conflict"]
        E2 --> R409b["409 Conflict"]
        E3 --> R500a["500 Internal Server Error"]
        E4 --> R500b["500 Internal Server Error"]
        Success --> R200["200 OK"]
    end
    
    style Errors fill:#ffebee
    style HTTPResponse fill:#e8f5e9
Loading

Related Documentation