Skip to content

Commit d28f048

Browse files
Merge pull request #30 from hyperi-io/chore/merge-to-release
chore: merge main into release
2 parents 359aff7 + ad30afe commit d28f048

28 files changed

Lines changed: 4094 additions & 295 deletions

Cargo.toml

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ name = "hyperi-rustlib"
1111
version = "1.19.8"
1212
edition = "2024"
1313
rust-version = "1.94"
14-
description = "Shared utility library for HyperI Rust applications"
14+
description = "Opinionated Rust framework for high-throughput data pipelines at PB scale. Auto-wiring config, logging, metrics, tracing, health, and graceful shutdown — built from many years of production infrastructure experience."
1515
license = "FSL-1.1-ALv2"
1616
repository = "https://github.com/hyperi-io/hyperi-rustlib"
1717
publish = true
@@ -20,9 +20,11 @@ categories = ["development-tools"]
2020
exclude = [".claude/", ".github/", "ci/", "ai/", "docs/", "examples/", "benches/", "scripts/"]
2121

2222
[features]
23-
default = ["config", "logger", "metrics", "runtime"]
23+
default = ["config", "logger", "metrics", "runtime", "shutdown", "health"]
2424

2525
# Core features
26+
health = []
27+
shutdown = ["tokio", "tokio-util"]
2628
runtime = ["dirs"]
2729
config = ["figment", "dotenvy", "serde_yaml_ng", "serde_json", "toml", "dirs", "tracing"]
2830
logger = ["tracing", "tracing-subscriber", "owo-colors", "serde_json", "tracing-throttle"]
@@ -91,7 +93,11 @@ transport-memory = ["transport"]
9193
transport-kafka = ["transport", "rdkafka"]
9294
transport-grpc = ["transport", "dep:tonic", "dep:tonic-prost", "dep:prost", "dep:prost-types", "dep:tonic-prost-build", "dep:prost-build"]
9395
transport-grpc-vector-compat = ["transport-grpc"]
94-
transport-all = ["transport-memory", "transport-kafka", "transport-grpc"]
96+
transport-file = ["transport", "io"]
97+
transport-pipe = ["transport"]
98+
transport-http = ["transport", "http"]
99+
transport-redis = ["transport", "redis"]
100+
transport-all = ["transport-memory", "transport-kafka", "transport-grpc", "transport-file", "transport-pipe", "transport-http", "transport-redis"]
95101

96102
# Secrets management
97103
secrets = ["tokio", "serde_json", "async-trait", "parking_lot", "base64", "dirs", "tracing"]
@@ -100,7 +106,7 @@ secrets-aws = ["secrets", "aws-config", "aws-sdk-secretsmanager"]
100106
secrets-all = ["secrets-vault", "secrets-aws"]
101107

102108
# Full feature set
103-
full = ["config", "config-reload", "logger", "metrics", "metrics-dfe", "otel", "otel-metrics", "runtime", "http", "http-server", "spool", "tiered-sink", "resilience", "database", "cache", "transport-all", "transport-grpc-vector-compat", "secrets-all", "directory-config", "directory-config-git", "deployment", "version-check", "scaling", "memory", "cli", "io", "dlq", "dlq-kafka", "output-file", "expression"]
109+
full = ["config", "config-reload", "logger", "metrics", "metrics-dfe", "otel", "otel-metrics", "runtime", "shutdown", "health", "http", "http-server", "spool", "tiered-sink", "resilience", "database", "cache", "transport-all", "transport-grpc-vector-compat", "secrets-all", "directory-config", "directory-config-git", "deployment", "version-check", "scaling", "memory", "cli", "io", "dlq", "dlq-kafka", "output-file", "expression"]
104110

105111
[dependencies]
106112
# Serialisation (always needed)
@@ -154,8 +160,9 @@ metrics-util = { version = ">=0.20.1, <0.21", optional = true }
154160
metrics-exporter-opentelemetry = { version = ">=0.2.1, <0.3", optional = true }
155161
sysinfo = { version = ">=0.38.0, <0.39", optional = true }
156162

157-
# Async runtime (for metrics server, http-server)
158-
tokio = { version = ">=1.50.0, <2", features = ["rt-multi-thread", "net", "sync", "time", "macros", "signal", "fs"], optional = true }
163+
# Async runtime (for metrics server, http-server, shutdown)
164+
tokio = { version = ">=1.50.0, <2", features = ["rt-multi-thread", "net", "sync", "time", "macros", "signal", "fs", "io-std", "io-util"], optional = true }
165+
tokio-util = { version = ">=0.7.14, <0.8", optional = true }
159166

160167
# HTTP client — pinned to reqwest 0.12 until vaultrs and opentelemetry-otlp
161168
# support 0.13. reqwest-middleware 0.4 and reqwest-retry 0.7 target 0.12.
@@ -183,6 +190,9 @@ rmp-serde = { version = ">=1.3.1, <2", optional = true }
183190
# Kafka transport (dynamic-linking: use system librdkafka instead of compiling C++ from source)
184191
rdkafka = { version = ">=0.39.0, <0.40", features = ["dynamic-linking"], optional = true }
185192

193+
# Redis/Valkey Streams transport
194+
redis = { version = ">=1.0, <2", features = ["tokio-comp", "streams"], optional = true }
195+
186196
# gRPC transport (tonic + prost)
187197
tonic = { version = ">=0.14, <0.15", features = ["gzip"], optional = true }
188198
tonic-prost = { version = ">=0.14.5, <0.15", optional = true }

STATE.md

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,34 @@ CARGO_BUILD_JOBS=2 cargo clippy
108108

109109
---
110110

111+
## Core Pillars (Non-Negotiable Design Decision)
112+
113+
Every module in hyperi-rustlib MUST auto-integrate with the core infrastructure
114+
pillars using the **global singleton pattern**. Services get observability for
115+
free — no handles passed, no opt-in, no extra code in downstream apps.
116+
117+
| Pillar | Singleton | Module | Pattern |
118+
|--------|-----------|--------|---------|
119+
| Config | `OnceLock<Config>` | `config` | `T::from_cascade()` reads from global figment |
120+
| Logging | Global `tracing` subscriber | `logger` | `tracing::info!()` macros — always available |
121+
| Metrics | Global `metrics` recorder | `metrics` | `metrics::counter!()` macros — no-op if no recorder |
122+
| Tracing | Global OTel subscriber | `otel` | Trace context auto-propagated (planned: always-on) |
123+
| Health | Global `HealthState` | `http-server` | Unified readiness flag (planned: auto-wired) |
124+
| Shutdown | `CancellationToken` | (planned) | Unified graceful shutdown (planned: auto-wired) |
125+
126+
**Rule:** When adding ANY new module or feature to rustlib:
127+
1. If it has configurable behaviour → load from cascade via `from_cascade()`
128+
2. If it does I/O or processing → add `#[cfg(feature = "metrics")]` counters/gauges/histograms
129+
3. If it can fail or has interesting state → add `tracing::` log calls
130+
4. If it affects service health → report into unified `HealthState`
131+
132+
**The goal:** A DFE app that does `MetricsManager::new("dfe_loader")` +
133+
`logger::setup_default()` + `config::setup()` at startup gets full
134+
observability across every rustlib feature it uses — transport, tiered-sink,
135+
spool, cache, secrets, HTTP client, DLQ — with zero additional wiring.
136+
137+
---
138+
111139
## Decisions
112140

113141
- **Dynamic linking for C deps** — rdkafka, libgit2, zstd, zlib, openssl all link against system libs via pkg-config. Eliminates ~30min C++ build for rdkafka. aws-lc-sys is the one exception (AWS SDK hardcodes it, no opt-out).

TODO.md

Lines changed: 56 additions & 128 deletions
Original file line numberDiff line numberDiff line change
@@ -1,164 +1,92 @@
11
# TODO - hyperi-rustlib
22

3-
**Project Goal:** Rust shared library equivalent to hyperi-pylib (Python) and hyperi-golib (Go)
3+
**Project Goal:** Opinionated Rust framework for high-throughput data pipelines at PB scale
44

5-
**Target:** Production-ready library for HyperI Rust applications
5+
**Target:** Production-ready library with auto-wiring config, logging, metrics, tracing, health, and graceful shutdown
66

77
---
88

99
## Current Tasks
1010

11-
### Kafka Transport Metrics Parity with gRPC `[NEXT]`
11+
### v2.0.0 Release `[NEXT]`
1212

13-
gRPC transport (v1.19.7) auto-emits `dfe_transport_*` metrics. Kafka transport does not — two gaps:
13+
All core pillar work is done. Need to:
14+
- [ ] Release-merge to release branch (feat!: breaking change → v2.0.0)
15+
- [ ] Verify crates.io publication
16+
- [ ] Docs consolidation (TRANSPORT.md, CORE-PILLARS.md, per-feature docs)
17+
- [ ] Add Redis vs Kafka comparison table to transport docs
1418

15-
1. **Add metrics instrumentation to `KafkaTransport::send()`** — emit `dfe_transport_sent_total{transport="kafka"}`, `dfe_transport_send_errors_total{transport="kafka"}`, `dfe_transport_backpressured_total{transport="kafka"}`, and send duration histogram, matching gRPC parity.
19+
### DLQ Transport Integration
1620

17-
2. **Wire `StatsContext` into `KafkaTransport`** — currently `new_with_context()` is a stub that ignores the context. The struct uses `DefaultConsumerContext` / plain `FutureProducer`, so `statistics.interval.ms` callbacks (set to 1000ms by all profiles) go to a no-op. Need to either make the struct generic over context (complicates `Transport` trait impl) or use `StatsContext` by default when the `metrics` feature is enabled. This enables `rdkafka_broker_rtt_avg_seconds`, `rdkafka_global_msg_cnt`, `rdkafka_topic_partition_consumer_lag`, etc.
21+
- [ ] DLQ Kafka backend uses `Box<dyn TransportSender>` / `AnySender` instead of raw producer
22+
- [ ] DLQ can write to any transport (file, HTTP, Redis, Kafka)
1823

19-
Downstream impact: dfe-fetcher, dfe-receiver, dfe-loader all use `KafkaTransport` and would get these for free once wired.
24+
### Identity / Auth Module (Discussion)
2025

21-
### Gap Analysis P2 — HTTP Client, Database URLs, Cache
26+
- [ ] Token validation middleware (JWT/OIDC) for gRPC interceptor + axum middleware
27+
- [ ] Service identity (service name + instance ID for mTLS, audit logs)
28+
- [ ] Break-glass: static bearer token from secrets module
29+
- [ ] Design decision: dfe-engine as SSoT, rustlib validates tokens only
2230

23-
- [ ] HTTP client module with retry middleware (reqwest + reqwest-middleware + reqwest-retry)
24-
- Wrap reqwest with exponential backoff, configurable timeouts
25-
- Auto-register config via `unmarshal_key_registered`
26-
- Metrics integration (request count, duration, errors)
27-
- [ ] Database URL builders (PostgreSQL, ClickHouse, Redis)
28-
- Build connection strings from env vars with standard prefixes
29-
- `SensitiveString` for password fields
30-
- [ ] Cache module with disk/memory backends
31-
- Consolidate secrets cache pattern into reusable module
32-
- TTL, stale-while-revalidate, size bounds
31+
### Downstream Remediation
3332

34-
### Completed Recent
35-
36-
- [x] **Config registry** (v1.19.3-v1.19.5) — auto-registering reflectable config, `/config` admin endpoint, `SensitiveString`, heuristic redaction, change notification, `ConfigReloader` hook
37-
- [x] **CEL expression profile** (v1.19.2) — `matches()` blocked by default, `ProfileConfig` with per-category overrides, string literal false-positive prevention
38-
- [x] **Config cascade wiring** (v1.19.2) — expression, memory, version_check, scaling, grpc, secrets auto-read from figment cascade
39-
- [x] **MemoryGuard underflow fix** (v1.19.1) — `fetch_sub` replaced with `fetch_update` + `saturating_sub`
40-
- [x] **Test restructure** (v1.19.1) — `tests/integration/`, `tests/e2e/`, `tests/common/` per testing standard
41-
- [x] **hyperi-ci release-merge** — CLI command replaces per-project workflow files
42-
- [x] **Rust edition 2024** — migrated from 2021; `temp-env` replaces unsafe `set_var`/`remove_var` in tests across 6 files
43-
- [x] **async-trait removal** — public traits (`Sink`, `Transport`, `SecretProvider`) now use `fn ... -> impl Future + Send` (Rust 1.75+ native)
44-
- [x] **kafka_config module**`config_from_file`, 7 named profiles, `merge_with_overrides`; librdkafka settings loaded from config git dir (only cascade exception)
45-
- [x] **File output sink**`src/io/`, `src/output/`, `output-file` feature
46-
- [x] **CLI module** — CommonArgs, StandardCommand, DfeApp trait (`cli` feature)
47-
- [x] **Top module** — ratatui TUI dashboard, Prometheus parser, oneshot mode (`top` feature)
48-
- [x] **CI gating fix** — Semantic Release now gated on CI success via workflow_run
33+
- [ ] Migrate dfe-loader to v2.0.0 (transport factory, remove boilerplate)
34+
- [ ] Migrate dfe-receiver to v2.0.0 (RoutedSender, transport factory)
35+
- [ ] Migrate dfe-archiver to v2.0.0
36+
- [ ] Migrate dfe-fetcher to v2.0.0
37+
- [ ] Migrate dfe-transform-wasm to v2.0.0
38+
- [ ] Migrate dfe-transform-vrl to v2.0.0
39+
- [ ] Audit hyperi-pylib and write alignment plan
4940

5041
---
5142

52-
## Completed
53-
54-
- [x] Vector compat integration tests — 6 tests using real Vector binary + VectorCompatClient (fetch-vector.sh + YAML)
55-
- [x] vault_env integration tests fixed — clear_vault_env() prevents VAULT_TOKEN leakage
56-
- [x] Dependency update sweep — all crates to latest, tonic/prost 0.14 migration (v1.8.4)
57-
- [x] Stale hs-rustlib removed from JFrog hypersec-cargo-local and hyperi-cargo-local
58-
- [x] MaskingLayer fixed — writer-based redaction for both JSON and text formats (v1.8.4)
59-
- [x] Logger output capturing tests — 10 tests (JSON, text, filtering, masking)
60-
- [x] Coloured log output — custom FormatEvent with owo-colors colour scheme
61-
- [x] Metrics graceful shutdown tests — 4 tests (shutdown, rapid cycle, render after stop, concurrent)
62-
- [x] gRPC transport integration tests — 8 tests (send/recv, ordering, large payload, compression)
63-
- [x] gRPC transport with Vector wire protocol compatibility (v1.8.0)
64-
- tonic-based gRPC replacing Zenoh transport
65-
- DFE native proto (`dfe.transport.v1`) + vendored Vector proto
66-
- Vector compat source/sink for migration from Vector pipelines
67-
- build.rs for conditional proto code generation
68-
- [x] Zenoh transport removed — replaced by gRPC (v1.8.0)
69-
- [x] Version check module — startup check against releases.hyperi.io (v1.7.0)
70-
- [x] Deployment validation module — Helm chart and Dockerfile contract checks (v1.7.0)
71-
- [x] CI: ARC self-hosted runners enabled (v1.7.1–v1.8.3)
72-
- [x] Clippy/formatting fixes — approx_constant lint, dprint float formatting (v1.8.1–v1.8.3)
73-
- [x] Package rename: hs-rustlib -> hyperi-rustlib, published v1.4.3 to JFrog
74-
- [x] Rebrand: HyperSec -> HyperI across source, docs, configs, workflows
75-
- [x] Registry migration: hypersec registry -> hyperi registry
76-
- [x] Submodule URLs: hypersec-io -> hyperi-io
77-
- [x] CI config: .hypersec-ci.yaml -> .hyperi-ci.yaml
78-
- [x] Directory-config store with git2 integration (v1.4.0)
79-
- [x] OpenTelemetry metrics support (v1.4.0)
80-
- [x] Secrets management module (OpenBao/Vault, AWS) (v1.3.x)
81-
- [x] HTTP server module (axum-based) (v1.2.0)
82-
- [x] Transport module (Kafka/Memory abstraction)
83-
- [x] TieredSink module (disk spillover with circuit breaker)
84-
- [x] Spool module (disk-backed queue)
85-
- [x] Configuration module (7-layer cascade with figment)
86-
- [x] Logger module (structured JSON, RFC3339, masking)
87-
- [x] Metrics module (Prometheus + process/container)
88-
- [x] Environment detection module
89-
- [x] Runtime paths module (XDG + container awareness)
90-
- [x] Dependency audit (serde_yml -> serde-yaml-ng, queue-file -> yaque, once_cell -> LazyLock)
91-
- [x] Config cascade alignment with hyperi-pylib unified spec (v1.6.0)
92-
- load_home_dotenv default false, app_name support, container/user config paths
93-
- Created docs/CONFIG-CASCADE.md
94-
- PG layer documented as built-for-not-with (YAML gitops covers current needs)
43+
### Completed This Session
44+
45+
- [x] **Transport trait split**`Transport` split into `TransportBase` + `TransportSender` + `TransportReceiver` with blanket `Transport` impl
46+
- [x] **Transport factory**`AnySender` enum dispatch from config, `RoutedSender` for per-key dispatch (receiver/fetcher only)
47+
- [x] **File transport** — NDJSON with position tracking, commit persistence, rotation
48+
- [x] **Pipe transport** — stdin/stdout for Unix pipeline composition
49+
- [x] **HTTP transport** — POST send + embedded axum receive (bidirectional)
50+
- [x] **Redis/Valkey Streams transport** — XADD/XREADGROUP/XACK with consumer groups
51+
- [x] **HealthRegistry** — global singleton, modules auto-register health check closures, `/readyz` aggregates, `/health/detailed` JSON
52+
- [x] **Shutdown manager** — global CancellationToken, SIGTERM/SIGINT handler, modules listen on token
53+
- [x] **OTel trace propagation** — W3C traceparent auto-injected/extracted in gRPC, Kafka, HTTP transports
54+
- [x] **Universal metrics** — all modules auto-emit Prometheus metrics via global recorder
55+
- [x] **Logging + config cascade** — added to all new transports
56+
- [x] **Health wiring** — Kafka, gRPC, CircuitBreaker, HttpServer auto-register
57+
- [x] **Shutdown wiring** — HttpServer, TieredSink drainer, ConfigReloader listen on global token
58+
- [x] **KafkaTransport StatsContext** — always-on, rdkafka_* metrics auto-emitted
59+
- [x] **gRPC transport metrics** — dfe_transport_* parity with Kafka
60+
- [x] **HTTP client, database URL builders, cache modules** (v1.19.6)
61+
- [x] **Config registry, SensitiveString, /config endpoint** (v1.19.3-v1.19.5)
62+
- [x] **Dependency updates, cargo-audit ignores** (v1.19.6)
9563

9664
---
9765

98-
## Backlog (P1 - Config Registry)
99-
100-
### Reflectable Config Registry
101-
102-
Central registry where every module registers its config section at startup.
103-
Currently modules independently call `unmarshal_key()` — no visibility into
104-
what config keys exist, their types, defaults, or descriptions.
105-
106-
**Goal:** Any DFE app can list/dump/expose all available config sections.
107-
108-
- [x] Auto-registration via `unmarshal_key_registered` — records `(key, type_name, defaults, effective)` in global registry. Zero code changes in downstream apps.
109-
- [x] `registry::sections()` — list all registered sections
110-
- [x] `registry::dump_effective()` — JSON map of effective values
111-
- [x] `registry::dump_defaults()` — JSON map of defaults (via `T::default()`)
112-
- [x] Heuristic auto-redaction (password, secret, token, key, credential, auth, private, cert, encryption)
113-
- [x] `#[serde(skip_serializing)]` as additional layer for fields that should never appear
114-
- [x] expression, memory, version_check, scaling, grpc, secrets wired with `from_cascade()` auto-register
115-
- [x] Modules without defaults (tiered_sink, http_server, kafka, spool, dlq) use `unmarshal_key_registered` from downstream apps
116-
- [x] `/config` admin endpoint (opt-in via `enable_config_endpoint`) — returns redacted effective + defaults JSON
117-
- [x] Change notification (opt-in) — `registry::on_change(key, callback)` + `registry::update()`
118-
- Modules that need hot-reload subscribe; others keep `OnceLock` (init-once)
119-
- [x] `ConfigReloader.with_registry_update(key)` connects hot-reload to registry
120-
- [x] `SensitiveString` type — compile-time safe, `Serialize` always redacts
121-
- [x] 19 registry + 12 sensitive string tests covering all redaction guarantees
122-
- [ ] Migrate all dfe-* and hyperi-* apps to `unmarshal_key_registered` pattern
123-
- [ ] Align hyperi-pylib with same registry pattern
124-
125-
---
126-
127-
## Backlog (P2 - from Gap Analysis)
128-
129-
### Phase 1 - Core Enterprise
130-
131-
- [ ] Database URL builders module (PostgreSQL, Redis)
132-
- [ ] HTTP client module with retry middleware (reqwest-retry)
66+
## Backlog
13367

13468
### Secrets Providers
13569

136-
- [ ] GCP Secret Manager provider (`secrets-gcp` feature, `google-cloud-secretmanager` crate)
137-
- [ ] Azure Key Vault provider (`secrets-azure` feature, `azure_security_keyvault` crate)
70+
- [ ] GCP Secret Manager provider (`secrets-gcp` feature)
71+
- [ ] Azure Key Vault provider (`secrets-azure` feature)
13872

139-
### Phase 2 - Enhanced Features
73+
### Kafka — Opinionated SASL-SCRAM Named Constructors
14074

141-
- [ ] Cache module with disk/Redis backing
142-
- [ ] CLI framework helpers (wrap Clap)
75+
- [ ] `KafkaConfig::external_sasl_scram(brokers, username, password)`
76+
- [ ] `KafkaConfig::internal_sasl_scram(brokers, username, password)`
14377

144-
### Phase 3 - Advanced
78+
### Other
14579

146-
- [ ] Standalone Kafka client (if transport layer insufficient)
14780
- [ ] PII anonymiser (evaluate Rust libraries)
14881
- [ ] Python bindings for ClickHouse client (PyO3)
14982

150-
### Kafka — Opinionated SASL-SCRAM Named Constructors
151-
152-
- [ ] Add `KafkaConfig::external_sasl_scram(brokers, username, password)` — SASL_SSL + SCRAM-SHA-512
153-
- [ ] Add `KafkaConfig::internal_sasl_scram(brokers, username, password)` — SASL_PLAINTEXT + SCRAM-SHA-512
154-
- [ ] Encodes the decision once: SCRAM works unchanged on Apache Kafka, AutoMQ, MSK, Confluent Cloud
155-
- [ ] Remove per-project manual assembly of protocol + sasl + tls fields in dfe-loader, dfe-receiver
156-
15783
---
15884

15985
## Notes
16086

16187
- Use `CARGO_BUILD_JOBS=2` for all cargo commands
162-
- Transport backends: Kafka, gRPC (native + Vector compat), Memory (Zenoh removed in v1.8.0)
163-
- See docs/GAP_ANALYSIS.md for detailed comparison with hyperi-pylib
164-
- See docs/CLICKHOUSE_PYTHON_BINDINGS.md for Python binding proposal
88+
- Transport backends: Kafka, gRPC, Memory, File, Pipe, HTTP, Redis/Valkey
89+
- Core pillars plan: `docs/superpowers/plans/2026-03-26-core-pillars.md`
90+
- Two deployment modes: Kafka-mediated (persistence) vs direct gRPC (low latency)
91+
- Routed transport is receiver/fetcher only — all other stages are 1:1
92+
- Breaking change: `feat!:` commit triggers v2.0.0 via semantic-release

0 commit comments

Comments
 (0)