InfluxDB wide format, v0.2.0

haoruizhou · haoruizhou · commit 22b72c2e85ff · 2026-03-16T00:33:02.000-04:00
Update pyproject.toml
diff --git a/pyproject.toml b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "slicks"
-version = "0.1.5"
+version = "0.2.0"
 description = "The home baked data pipeline for Western Formula Racing"
 readme = "README.md"
 authors = [
@@ -19,6 +19,8 @@ requires-python = ">=3.11"
 dependencies = [
     "pandas>=2.0.0",
     "influxdb3-python>=0.1.0",
+    "influxdb-client>=1.30.0",
+    "cantools>=39.0.0",
     "python-dotenv>=1.0.0",
     "matplotlib>=3.0.0",
     "tqdm>=4.0.0",
diff --git a/src/slicks/__init__.py b/src/slicks/__init__.py
@@ -3,6 +3,8 @@
 from .movement_detector import detect_movement_ratio, get_movement_segments, filter_data_in_movement
 from .config import connect_influxdb3
 from .scanner import scan_data_availability
+from .can_decode import DecodedFrame, decode_frame, load_dbc, resolve_dbc_path
+from .writer import WideWriter, frame_to_line_protocol, NON_SIGNAL_COLS
 
 # New analysis modules
 from . import battery
diff --git a/src/slicks/can_decode.py b/src/slicks/can_decode.py
@@ -0,0 +1,87 @@
+"""CAN frame decoding — shared logic for all telemetry writers."""
+from __future__ import annotations
+
+import os
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Optional
+
+import cantools
+
+
+@dataclass
+class DecodedFrame:
+    message_name: str
+    can_id: int
+    signals: dict[str, float] = field(default_factory=dict)
+
+
+def resolve_dbc_path(env_var: str = "DBC_FILE_PATH", fallback: str = "example.dbc") -> Path:
+    """Resolve DBC file path from environment variable or common locations."""
+    env_val = os.getenv(env_var, fallback)
+    env_path = Path(env_val)
+    if env_path.exists():
+        return env_path
+
+    candidates = [
+        Path("/app/example.dbc"),
+        Path("/installer/example.dbc"),
+        Path(__file__).parent.parent.parent / "example.dbc",
+    ]
+    for c in candidates:
+        if c.exists():
+            return c
+
+    # Try newest .dbc in current directory
+    dbcs = sorted(Path(".").glob("*.dbc"), key=lambda p: p.stat().st_mtime, reverse=True)
+    if dbcs:
+        return dbcs[0]
+
+    raise FileNotFoundError(
+        f"Could not find DBC file. Set {env_var} or place example.dbc in /app/."
+    )
+
+
+def load_dbc(path: Optional[Path] = None) -> cantools.Database:
+    """Load a cantools DBC database, resolving path if not provided."""
+    if path is None:
+        path = resolve_dbc_path()
+    return cantools.database.load_file(str(path))
+
+
+def decode_frame(db: cantools.Database, can_id: int, data: bytes) -> Optional[DecodedFrame]:
+    """
+    Decode a CAN frame using a loaded DBC database.
+
+    Handles:
+    - Extended CAN IDs (bit 31 flag stripped before lookup)
+    - NamedSignalValue enums (converted to float)
+    - Non-numeric signal values (skipped)
+
+    Returns DecodedFrame with only numeric signals, or None if CAN ID not in DBC.
+    """
+    effective_id = can_id & 0x1FFFFFFF  # Strip extended CAN ID flag
+
+    try:
+        message = db.get_message_by_frame_id(effective_id)
+    except KeyError:
+        return None
+
+    try:
+        raw = message.decode(data)
+    except Exception:
+        return None
+
+    signals: dict[str, float] = {}
+    for name, val in raw.items():
+        if hasattr(val, "value") and hasattr(val, "name"):
+            # NamedSignalValue enum from cantools
+            try:
+                signals[name] = float(val.value)
+            except (ValueError, TypeError):
+                continue
+        elif isinstance(val, (int, float)):
+            signals[name] = float(val)
+        # else: skip non-numeric values
+
+    return DecodedFrame(message_name=message.name, can_id=can_id, signals=signals)
diff --git a/src/slicks/discovery.py b/src/slicks/discovery.py
@@ -2,7 +2,8 @@
 Sensor discovery module.
 
 Scans the database for all unique sensor names within a time range.
-Uses adaptive chunking with parallel execution.
+Uses adaptive chunking with parallel execution for narrow schema.
+For wide schema, uses instant information_schema.columns metadata lookup.
 """
 
 from __future__ import annotations
@@ -16,6 +17,7 @@
 
 from . import config
 from .query_utils import adaptive_query, run_chunks_parallel, PermanentQueryError, quote_table
+from .writer import NON_SIGNAL_COLS
 
 
 def discover_sensors(
@@ -24,23 +26,53 @@ def discover_sensors(
     chunk_size_days: int = 7,
     client: Optional[InfluxDBClient3] = None,
     show_progress: bool = True,
+    schema: str = "narrow",
 ) -> List[str]:
     """
     Scan the database for ALL unique sensor names within the time range.
 
-    Uses adaptive chunking with parallel execution to handle server
-    resource limits efficiently.
+    For ``schema="wide"``, uses an instant ``information_schema.columns`` metadata
+    lookup (no data scan, no adaptive bisection, ignores time range and chunk params).
+
+    For ``schema="narrow"`` (default), uses adaptive chunking with parallel execution
+    to handle server resource limits efficiently.
 
     Args:
-        start_time: Start of scan range.
-        end_time: End of scan range.
-        chunk_size_days: Days per chunk (default 7).
+        start_time: Start of scan range (narrow schema only).
+        end_time: End of scan range (narrow schema only).
+        chunk_size_days: Days per chunk (default 7, narrow schema only).
         client: Ignored (kept for backward compatibility).
-        show_progress: Show progress bar (default True).
+        show_progress: Show progress bar (default True, narrow schema only).
+        schema: "narrow" (legacy EAV) or "wide" (one field per signal).
 
     Returns:
         Sorted list of unique sensor name strings.
     """
+    db_schema = config.INFLUX_SCHEMA or "iox"
+    table = config.INFLUX_TABLE or config.INFLUX_DB
+
+    if schema == "wide":
+        # Instant metadata lookup — no data scan needed
+        cli = InfluxDBClient3(
+            host=config.INFLUX_URL,
+            token=config.INFLUX_TOKEN,
+            database=config.INFLUX_DB,
+        )
+        sql = (
+            f"SELECT column_name FROM information_schema.columns "
+            f"WHERE table_schema = '{db_schema}' AND table_name = '{table}'"
+        )
+        result = cli.query(query=sql)
+        if result.num_rows == 0:
+            return []
+        col = result.column("column_name")
+        return sorted(
+            v.as_py()
+            for v in col
+            if v.as_py() is not None and v.as_py() not in NON_SIGNAL_COLS
+        )
+
+    # --- narrow (legacy EAV) path ---
 
     def _make_client() -> InfluxDBClient3:
         return InfluxDBClient3(
@@ -52,21 +84,17 @@ def _make_client() -> InfluxDBClient3:
     def _query_distinct(
         client: InfluxDBClient3, t0: datetime, t1: datetime,
     ) -> List[str]:
-        # Ensure safe defaults if config vars are missing or empty
-        schema = config.INFLUX_SCHEMA or "iox"
-        table = config.INFLUX_TABLE or config.INFLUX_DB
-        table_ref = quote_table(schema, table)
-        
+        table_ref = quote_table(db_schema, table)
         sql = f"""
         SELECT DISTINCT "signalName"
         FROM {table_ref}
         WHERE time >= '{t0.isoformat()}Z'
         AND time < '{t1.isoformat()}Z'
         """
-        table = client.query(query=sql)
-        if table.num_rows == 0:
+        tbl = client.query(query=sql)
+        if tbl.num_rows == 0:
             return []
-        col = table.column("signalName")
+        col = tbl.column("signalName")
         return [v.as_py() for v in col if v.as_py() is not None]
 
     def _process_chunk(
diff --git a/src/slicks/fetcher.py b/src/slicks/fetcher.py
diff --git a/src/slicks/writer.py b/src/slicks/writer.py