diff --git a/CHANGELOG.md b/CHANGELOG.md index c3f44210..19721055 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,40 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [8.0.36] - 2026-05-07 + +### Changed + +- **`crates/thetadatadx/src/decode.rs` (2177 LoC) split into 7 modules** + under `mdds/decode/{error,headers,transport,extract,cell,v3}`. Pure + structural refactor; public API unchanged via `mdds::decode::*` re-exports. +- **Eastern-time + DST primitives lifted to `tdbe::time`.** + `eastern_offset_ms`, `march_second_sunday_utc`, `november_first_sunday_utc`, + `april_first_sunday_utc`, `october_last_sunday_utc`, `civil_to_epoch_days`, + `timestamp_to_ms_of_day`, `timestamp_to_date` — single canonical module + reused by mdds, fpss, flatfiles. tdbe 0.12.9 → 0.12.10. +- **`crates/thetadatadx/src/fpss/protocol.rs` (1613 LoC) split into 4 modules** + under `fpss/protocol/`. `mod.rs` keeps constants and re-exports; + `contract.rs` holds `Contract` + 6 constructors + `Display` + `FromStr` + + OCC-21 parser; `wire.rs` holds payload builders / parsers; `subscription.rs` + holds `SubscriptionKind`. +- **`crates/thetadatadx/src/config.rs` (1396 LoC, 30 flat fields) refactored + into 7 nested typed sub-configs.** `DirectConfig` now contains `mdds`, + `fpss`, `reconnect`, `retry`, `auth`, `metrics`, `runtime`. Field-read + accessors preserved on `DirectConfig` for back-compat (`config.mdds_host()` + etc still work). Field-write callers must migrate to nested form + (`config.fpss.queue_depth = ...`). Adds `mdds.connect_timeout_secs` + (default 10s, covers prior LOW finding). +- **`crates/tdbe/src/conditions.rs` (2749 LoC) refactored to TOML-driven + codegen.** Source-of-truth at `crates/tdbe/data/{trade,quote}_conditions.toml` + (149 + 75 entries). `crates/tdbe/build.rs` reads the TOMLs and emits + `crates/tdbe/src/conditions/tables_generated.rs` with compile-time + const arrays. Public surface unchanged; new `condition_tables_pin` + test pins 12 known entries against the const arrays for round-trip + protection. + + Refs #500. + ## [8.0.35] - 2026-05-07 ### Documentation diff --git a/Cargo.lock b/Cargo.lock index bf1313f5..8fed2163 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3075,11 +3075,13 @@ dependencies = [ [[package]] name = "tdbe" -version = "0.12.9" +version = "0.12.10" dependencies = [ "criterion", + "serde", "sonic-rs", "thiserror 2.0.18", + "toml", ] [[package]] @@ -3097,7 +3099,7 @@ dependencies = [ [[package]] name = "thetadatadx" -version = "8.0.35" +version = "8.0.36" dependencies = [ "arrow-array", "arrow-schema", @@ -3137,7 +3139,7 @@ dependencies = [ [[package]] name = "thetadatadx-cli" -version = "8.0.35" +version = "8.0.36" dependencies = [ "clap", "comfy-table", @@ -3149,7 +3151,7 @@ dependencies = [ [[package]] name = "thetadatadx-ffi" -version = "8.0.35" +version = "8.0.36" dependencies = [ "tdbe", "thetadatadx", diff --git a/crates/tdbe/Cargo.toml b/crates/tdbe/Cargo.toml index d56e73b3..abaf4a66 100644 --- a/crates/tdbe/Cargo.toml +++ b/crates/tdbe/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "tdbe" -version = "0.12.9" +version = "0.12.10" edition.workspace = true rust-version.workspace = true authors.workspace = true @@ -11,6 +11,14 @@ repository.workspace = true license.workspace = true keywords = ["thetadata", "encoding", "market-data", "codec", "greeks"] categories = ["encoding", "finance"] +build = "build.rs" +include = [ + "src/**/*.rs", + "data/*.toml", + "build.rs", + "Cargo.toml", + "README.md", +] [lints] workspace = true @@ -19,6 +27,13 @@ workspace = true sonic-rs = "0.5.8" thiserror = "2.0.18" +[build-dependencies] +# Build-time only: parses `data/{trade,quote}_conditions.toml` into the +# committed `src/conditions/tables_generated.rs`. Does not ship to +# runtime consumers -- `tdbe`'s runtime dep graph is unchanged. +serde = { version = "1.0", features = ["derive"] } +toml = "1.1" + [dev-dependencies] criterion = { version = "0.8.2", features = ["html_reports"] } diff --git a/crates/tdbe/build.rs b/crates/tdbe/build.rs new file mode 100644 index 00000000..a2e074d7 --- /dev/null +++ b/crates/tdbe/build.rs @@ -0,0 +1,186 @@ +//! Build-time codegen for trade and quote condition tables. +//! +//! Reads `data/trade_conditions.toml` (149 entries) and +//! `data/quote_conditions.toml` (75 entries) and emits +//! `src/conditions/tables_generated.rs` containing the +//! `TRADE_CONDITIONS` and `QUOTE_CONDITIONS` const arrays. +//! +//! The generated file is committed so downstream consumers building +//! `tdbe` from crates.io don't have to re-run codegen unless they edit +//! the TOML source-of-truth files locally. + +use std::env; +use std::fs; +use std::path::PathBuf; + +use serde::Deserialize; +use toml::Value; + +#[derive(Deserialize)] +struct TradeFile { + trade: Vec, +} + +#[derive(Deserialize)] +struct QuoteFile { + quote: Vec, +} + +#[derive(Deserialize)] +struct TradeRow { + code: i32, + name: String, + description: String, + cancel: bool, + late_report: bool, + auto_executed: bool, + open_report: bool, + volume: bool, + high: bool, + low: bool, + last: bool, +} + +#[derive(Deserialize)] +struct QuoteRow { + code: i32, + name: String, + description: String, + firm: bool, + halted: bool, +} + +fn rust_string_literal(s: &str) -> String { + let mut out = String::with_capacity(s.len() + 2); + out.push('"'); + for ch in s.chars() { + match ch { + '\\' => out.push_str("\\\\"), + '"' => out.push_str("\\\""), + '\n' => out.push_str("\\n"), + '\r' => out.push_str("\\r"), + '\t' => out.push_str("\\t"), + c if (c as u32) < 0x20 => { + use std::fmt::Write; + let _ = write!(out, "\\u{{{:x}}}", c as u32); + } + c => out.push(c), + } + } + out.push('"'); + out +} + +fn main() { + let manifest_dir = PathBuf::from(env::var("CARGO_MANIFEST_DIR").unwrap()); + let trade_toml = manifest_dir.join("data/trade_conditions.toml"); + let quote_toml = manifest_dir.join("data/quote_conditions.toml"); + let out = manifest_dir.join("src/conditions/tables_generated.rs"); + + println!("cargo:rerun-if-changed=data/trade_conditions.toml"); + println!("cargo:rerun-if-changed=data/quote_conditions.toml"); + println!("cargo:rerun-if-changed=build.rs"); + + let trade_src = fs::read_to_string(&trade_toml) + .unwrap_or_else(|e| panic!("read {}: {e}", trade_toml.display())); + let quote_src = fs::read_to_string("e_toml) + .unwrap_or_else(|e| panic!("read {}: {e}", quote_toml.display())); + + // Parse via `toml::Value` first to give precise spec errors, then re-deserialize + // into typed rows. + let _: Value = toml::from_str(&trade_src).expect("trade_conditions.toml: invalid TOML"); + let _: Value = toml::from_str("e_src).expect("quote_conditions.toml: invalid TOML"); + + let trades: TradeFile = + toml::from_str(&trade_src).expect("trade_conditions.toml: schema mismatch"); + let quotes: QuoteFile = + toml::from_str("e_src).expect("quote_conditions.toml: schema mismatch"); + + assert_eq!( + trades.trade.len(), + 149, + "trade_conditions.toml must have exactly 149 entries" + ); + assert_eq!( + quotes.quote.len(), + 75, + "quote_conditions.toml must have exactly 75 entries" + ); + for (i, t) in trades.trade.iter().enumerate() { + assert_eq!( + t.code as usize, i, + "trade_conditions.toml[{i}] has code {} (must equal index)", + t.code + ); + } + for (i, q) in quotes.quote.iter().enumerate() { + assert_eq!( + q.code as usize, i, + "quote_conditions.toml[{i}] has code {} (must equal index)", + q.code + ); + } + + let mut s = String::new(); + s.push_str("// @generated DO NOT EDIT — regenerated by build.rs from data/*.toml\n"); + s.push_str("//\n"); + s.push_str("// Source-of-truth: crates/tdbe/data/trade_conditions.toml\n"); + s.push_str("// crates/tdbe/data/quote_conditions.toml\n\n"); + s.push_str("use super::{QuoteCondition, TradeCondition};\n\n"); + + s.push_str("/// All 149 trade condition codes (0..148).\n"); + s.push_str("pub const TRADE_CONDITIONS: [TradeCondition; 149] = [\n"); + for t in &trades.trade { + s.push_str(" TradeCondition {\n"); + s.push_str(&format!(" code: {},\n", t.code)); + s.push_str(&format!( + " name: {},\n", + rust_string_literal(&t.name) + )); + s.push_str(&format!( + " description: {},\n", + rust_string_literal(&t.description) + )); + s.push_str(&format!(" cancel: {},\n", t.cancel)); + s.push_str(&format!(" late_report: {},\n", t.late_report)); + s.push_str(&format!(" auto_executed: {},\n", t.auto_executed)); + s.push_str(&format!(" open_report: {},\n", t.open_report)); + s.push_str(&format!(" volume: {},\n", t.volume)); + s.push_str(&format!(" high: {},\n", t.high)); + s.push_str(&format!(" low: {},\n", t.low)); + s.push_str(&format!(" last: {},\n", t.last)); + s.push_str(" },\n"); + } + s.push_str("];\n\n"); + + s.push_str("/// All 75 quote condition codes (0..74).\n"); + s.push_str("pub const QUOTE_CONDITIONS: [QuoteCondition; 75] = [\n"); + for q in "es.quote { + s.push_str(" QuoteCondition {\n"); + s.push_str(&format!(" code: {},\n", q.code)); + s.push_str(&format!( + " name: {},\n", + rust_string_literal(&q.name) + )); + s.push_str(&format!( + " description: {},\n", + rust_string_literal(&q.description) + )); + s.push_str(&format!(" firm: {},\n", q.firm)); + s.push_str(&format!(" halted: {},\n", q.halted)); + s.push_str(" },\n"); + } + s.push_str("];\n"); + + // Write only if changed, to avoid touching mtime and triggering downstream rebuilds. + let needs_write = match fs::read_to_string(&out) { + Ok(existing) => existing != s, + Err(_) => true, + }; + if needs_write { + if let Some(parent) = out.parent() { + fs::create_dir_all(parent).expect("create conditions dir"); + } + fs::write(&out, s).expect("write tables_generated.rs"); + } +} diff --git a/crates/tdbe/data/quote_conditions.toml b/crates/tdbe/data/quote_conditions.toml new file mode 100644 index 00000000..8b3c125c --- /dev/null +++ b/crates/tdbe/data/quote_conditions.toml @@ -0,0 +1,526 @@ +# @generated extracted from crates/tdbe/src/conditions.rs + +[[quote]] +code = 0 +name = "REGULAR" +description = "Regular two-sided quote" +firm = true +halted = false + +[[quote]] +code = 1 +name = "BID_ASK_AUTO_EXEC" +description = "Bid/Ask automatically executable" +firm = true +halted = false + +[[quote]] +code = 2 +name = "ROTATION" +description = "Market rotation (opening/closing)" +firm = false +halted = false + +[[quote]] +code = 3 +name = "SPECIALIST_ASK" +description = "Specialist ask side only" +firm = true +halted = false + +[[quote]] +code = 4 +name = "SPECIALIST_BID" +description = "Specialist bid side only" +firm = true +halted = false + +[[quote]] +code = 5 +name = "LOCKED" +description = "Bid equals ask across markets" +firm = true +halted = false + +[[quote]] +code = 6 +name = "FAST_MARKET" +description = "Fast market -- quotes may not reflect current state" +firm = false +halted = false + +[[quote]] +code = 7 +name = "SPECIALIST_BID_ASK" +description = "Specialist bid and ask" +firm = true +halted = false + +[[quote]] +code = 8 +name = "ONE_SIDE" +description = "One-sided quote (bid or ask only)" +firm = true +halted = false + +[[quote]] +code = 9 +name = "OPENING_QUOTE" +description = "Opening quote for the trading session" +firm = false +halted = false + +[[quote]] +code = 10 +name = "CLOSING_QUOTE" +description = "Closing quote for the trading session" +firm = false +halted = false + +[[quote]] +code = 11 +name = "MARKET_MAKER_CLOSED" +description = "Market maker has closed their quote" +firm = false +halted = false + +[[quote]] +code = 12 +name = "DEPTH_ON_ASK" +description = "Depth indicated on the ask side" +firm = true +halted = false + +[[quote]] +code = 13 +name = "DEPTH_ON_BID" +description = "Depth indicated on the bid side" +firm = true +halted = false + +[[quote]] +code = 14 +name = "DEPTH_ON_BID_ASK" +description = "Depth indicated on both bid and ask sides" +firm = true +halted = false + +[[quote]] +code = 15 +name = "TIER_3" +description = "Tier 3 quote (OTC Bulletin Board)" +firm = true +halted = false + +[[quote]] +code = 16 +name = "CROSSED" +description = "Bid exceeds ask across markets" +firm = true +halted = false + +[[quote]] +code = 17 +name = "HALTED" +description = "Trading halted" +firm = false +halted = true + +[[quote]] +code = 18 +name = "OPERATIONAL_HALT" +description = "Halt due to operational issues at the exchange" +firm = false +halted = true + +[[quote]] +code = 19 +name = "NEWS_OUT" +description = "Halted -- news has been released" +firm = false +halted = true + +[[quote]] +code = 20 +name = "NEWS_PENDING" +description = "Halted -- news is pending" +firm = false +halted = true + +[[quote]] +code = 21 +name = "NON_FIRM" +description = "Quote is non-firm (indicative only)" +firm = false +halted = false + +[[quote]] +code = 22 +name = "DUE_TO_RELATED" +description = "Halted due to related security" +firm = false +halted = true + +[[quote]] +code = 23 +name = "RESUME" +description = "Trading has resumed" +firm = false +halted = false + +[[quote]] +code = 24 +name = "NO_MARKET_MAKERS" +description = "No market makers in the security" +firm = false +halted = true + +[[quote]] +code = 25 +name = "ORDER_IMBALANCE" +description = "Halted due to order imbalance" +firm = false +halted = true + +[[quote]] +code = 26 +name = "ORDER_INFLUX" +description = "Halted due to large influx of orders" +firm = false +halted = true + +[[quote]] +code = 27 +name = "INDICATED" +description = "Halted -- indicated price only" +firm = false +halted = true + +[[quote]] +code = 28 +name = "PRE_OPEN" +description = "Pre-open period before regular trading" +firm = false +halted = false + +[[quote]] +code = 29 +name = "IN_VIEW_OF_COMMON" +description = "Halted in view of common stock trading" +firm = false +halted = true + +[[quote]] +code = 30 +name = "RELATED_NEWS_PENDING" +description = "Halted -- news pending on a related security" +firm = false +halted = true + +[[quote]] +code = 31 +name = "RELATED_NEWS_OUT" +description = "Halted -- news released on a related security" +firm = false +halted = true + +[[quote]] +code = 32 +name = "ADDITIONAL_INFO" +description = "Halted -- additional information requested by exchange" +firm = false +halted = true + +[[quote]] +code = 33 +name = "RELATED_ADD_INFO" +description = "Halted -- additional information requested for related security" +firm = false +halted = true + +[[quote]] +code = 34 +name = "NO_OPEN_RESUME" +description = "Halted -- no open/no resume" +firm = false +halted = true + +[[quote]] +code = 35 +name = "DELETED" +description = "Quote has been deleted" +firm = false +halted = true + +[[quote]] +code = 36 +name = "REGULATORY_HALT" +description = "Regulatory halt imposed" +firm = false +halted = true + +[[quote]] +code = 37 +name = "SEC_SUSPENSION" +description = "SEC trading suspension" +firm = false +halted = true + +[[quote]] +code = 38 +name = "NON_COMLIANCE" +description = "Non-compliance with listing requirements" +firm = false +halted = true + +[[quote]] +code = 39 +name = "FILINGS_NOT_CURRENT" +description = "Issuer filings not current" +firm = false +halted = true + +[[quote]] +code = 40 +name = "CATS_HALTED" +description = "CATS halted" +firm = false +halted = true + +[[quote]] +code = 41 +name = "CATS" +description = "CATS session" +firm = false +halted = false + +[[quote]] +code = 42 +name = "EX_DIV_OR_SPLIT" +description = "Ex-dividend or ex-split" +firm = true +halted = false + +[[quote]] +code = 43 +name = "UNASSIGNED" +description = "Unassigned condition code" +firm = false +halted = false + +[[quote]] +code = 44 +name = "INSIDE_OPEN" +description = "Inside market is open" +firm = false +halted = false + +[[quote]] +code = 45 +name = "INSIDE_CLOSED" +description = "Inside market is closed" +firm = false +halted = false + +[[quote]] +code = 46 +name = "OFFER_WANTED" +description = "Offer wanted -- seeking sellers" +firm = false +halted = false + +[[quote]] +code = 47 +name = "BID_WANTED" +description = "Bid wanted -- seeking buyers" +firm = false +halted = false + +[[quote]] +code = 48 +name = "CASH" +description = "Cash-only settlement" +firm = true +halted = false + +[[quote]] +code = 49 +name = "INACTIVE" +description = "Inactive security or market" +firm = true +halted = false + +[[quote]] +code = 50 +name = "NATIONAL_BBO" +description = "National best bid and offer" +firm = true +halted = false + +[[quote]] +code = 51 +name = "NOMINAL" +description = "Nominal (calculated) quote" +firm = true +halted = false + +[[quote]] +code = 52 +name = "CABINET" +description = "Cabinet trade quote (deep OTM options)" +firm = true +halted = false + +[[quote]] +code = 53 +name = "NOMINAL_CABINET" +description = "Nominal cabinet quote" +firm = true +halted = false + +[[quote]] +code = 54 +name = "BLANK_PRICE" +description = "Blank price -- no quote available" +firm = true +halted = false + +[[quote]] +code = 55 +name = "SLOW_BID_ASK" +description = "Slow quote on both bid and ask sides" +firm = false +halted = false + +[[quote]] +code = 56 +name = "SLOW_LIST" +description = "Slow quote due to LRP or gap condition" +firm = true +halted = false + +[[quote]] +code = 57 +name = "SLOW_BID" +description = "Slow quote on the bid side" +firm = false +halted = false + +[[quote]] +code = 58 +name = "SLOW_ASK" +description = "Slow quote on the ask side" +firm = false +halted = false + +[[quote]] +code = 59 +name = "BID_OFFER_WANTED" +description = "Both bid and offer wanted" +firm = false +halted = false + +[[quote]] +code = 60 +name = "SUBPENNY" +description = "Sub-penny trading increment" +firm = false +halted = false + +[[quote]] +code = 61 +name = "NON_BBO" +description = "Non-BBO quote (not best bid/offer)" +firm = false +halted = false + +[[quote]] +code = 62 +name = "SPECIAL_OPEN" +description = "Special opening quote" +firm = false +halted = false + +[[quote]] +code = 63 +name = "BENCHMARK" +description = "Benchmark quote" +firm = false +halted = false + +[[quote]] +code = 64 +name = "IMPLIED" +description = "Implied quote (derived from related instruments)" +firm = false +halted = false + +[[quote]] +code = 65 +name = "EXCHANGE_BEST" +description = "Exchange best bid/offer" +firm = false +halted = false + +[[quote]] +code = 66 +name = "MKT_WIDE_HALT_1" +description = "Market-wide circuit breaker halt -- Level 1" +firm = false +halted = true + +[[quote]] +code = 67 +name = "MKT_WIDE_HALT_2" +description = "Market-wide circuit breaker halt -- Level 2" +firm = false +halted = true + +[[quote]] +code = 68 +name = "MKT_WIDE_HALT_3" +description = "Market-wide circuit breaker halt -- Level 3" +firm = false +halted = true + +[[quote]] +code = 69 +name = "ON_DEMAND_AUCTION" +description = "On-demand auction" +firm = false +halted = false + +[[quote]] +code = 70 +name = "NON_FIRM_BID" +description = "Bid side is non-firm (indicative only)" +firm = false +halted = false + +[[quote]] +code = 71 +name = "NON_FIRM_ASK" +description = "Ask side is non-firm (indicative only)" +firm = false +halted = false + +[[quote]] +code = 72 +name = "RETAIL_BID" +description = "Retail interest on bid side" +firm = false +halted = false + +[[quote]] +code = 73 +name = "RETAIL_ASK" +description = "Retail interest on ask side" +firm = false +halted = false + +[[quote]] +code = 74 +name = "RETAIL_QTE" +description = "Retail interest on both sides" +firm = false +halted = false diff --git a/crates/tdbe/data/trade_conditions.toml b/crates/tdbe/data/trade_conditions.toml new file mode 100644 index 00000000..db07695a --- /dev/null +++ b/crates/tdbe/data/trade_conditions.toml @@ -0,0 +1,1938 @@ +# @generated extracted from crates/tdbe/src/conditions.rs + +[[trade]] +code = 0 +name = "REGULAR" +description = "Regular Trade" +cancel = false +late_report = false +auto_executed = false +open_report = false +volume = true +high = true +low = true +last = true + +[[trade]] +code = 1 +name = "FORMT" +description = "Form T. Before and After Regular Hours. note: NYSE/AMEX previously used code 'T' for BurstBasket." +cancel = false +late_report = false +auto_executed = false +open_report = false +volume = true +high = false +low = false +last = false + +[[trade]] +code = 2 +name = "OUTOFSEQ" +description = "Report was sent Out Of Sequence. Updates last if it becomes only trade (if the trade reports before it are canceled, for example)." +cancel = false +late_report = true +auto_executed = false +open_report = false +volume = true +high = true +low = true +last = true + +[[trade]] +code = 3 +name = "AVGPRC" +description = "Average Price for a trade. NYSE/AMEX stocks. Nasdaq uses AvgPrc_Nasdaq-- main difference is NYSE/AMEX does not conditionally set high/low/last." +cancel = false +late_report = false +auto_executed = false +open_report = false +volume = true +high = false +low = false +last = false + +[[trade]] +code = 4 +name = "AVGPRC_NASDAQ" +description = "Average Price. Nasdaq stocks. Similar to AvgPrc, but does not set high/low/last." +cancel = false +late_report = false +auto_executed = false +open_report = false +volume = true +high = false +low = false +last = false + +[[trade]] +code = 5 +name = "OPENREPORTLATE" +description = "NYSE/AMEX. Market opened Late. Here is the report. It may not be in sequence. Nasdaq uses OpenReportOutOfSeq. *update last if only trade." +cancel = false +late_report = true +auto_executed = false +open_report = false +volume = true +high = true +low = true +last = true + +[[trade]] +code = 6 +name = "OPENREPORTOUTOFSEQ" +description = "Report IS out of sequence. Market was open, and now this report is just getting to us." +cancel = false +late_report = true +auto_executed = false +open_report = false +volume = true +high = true +low = true +last = false + +[[trade]] +code = 7 +name = "OPENREPORTINSEQ" +description = "Opening report. This is the first price." +cancel = false +late_report = true +auto_executed = false +open_report = false +volume = true +high = true +low = true +last = true + +[[trade]] +code = 8 +name = "PRIORREFERENCEPRICE" +description = "Trade references price established earlier. *Update last if this is the only trade report." +cancel = false +late_report = true +auto_executed = false +open_report = false +volume = true +high = true +low = true +last = true + +[[trade]] +code = 9 +name = "NEXTDAYSALE" +description = "NYSE/AMEX:Next Day Clearing. Nasdaq: Delivery of Securities and payment one to four days later. * As of September 5, 2017, the NYSE will no longer accept orders with Cash, Next Day or Seller's Option instructions . [URL1: https://www.nyse.com/trader-update/history#110000073055]" +cancel = false +late_report = false +auto_executed = false +open_report = false +volume = true +high = false +low = false +last = false + +[[trade]] +code = 10 +name = "BUNCHED" +description = "Aggregate of 2 or more Regular trades at same price within 60 seconds and each trade size not greater than 10,000." +cancel = false +late_report = false +auto_executed = false +open_report = false +volume = true +high = true +low = true +last = true + +[[trade]] +code = 11 +name = "CASHSALE" +description = "Delivery of securities and payment on the same day. * As of September 5, 2017, the NYSE will no longer accept orders with Cash, Next Day or Seller's Option instructions . [URL1: https://www.nyse.com/trader-update/history#110000073055]" +cancel = false +late_report = false +auto_executed = false +open_report = false +volume = true +high = false +low = false +last = false + +[[trade]] +code = 12 +name = "SELLER" +description = "Stock can be delivered up to 60 days later as specified by the seller. After 1995, the number of days can be greater than 60. note: delivery of 3 days would be considered a regular trade. * As of September 5, 2017, the NYSE will no longer accept orders with Cash, Next Day or Seller's Option instructions . [URL1: https://www.nyse.com/trader-update/history#110000073055]" +cancel = false +late_report = false +auto_executed = false +open_report = false +volume = true +high = false +low = false +last = false + +[[trade]] +code = 13 +name = "SOLDLAST" +description = "Late Reporting. *Sets Consolidated Last if no other qualifying Last, or same Exchange set previous Trade, or Exchange is Listed Exchange." +cancel = false +late_report = true +auto_executed = false +open_report = false +volume = true +high = true +low = true +last = true + +[[trade]] +code = 14 +name = "RULE127" +description = "NYSE only. Rule 127 basically denotes the trade was executed as a block trade." +cancel = false +late_report = false +auto_executed = false +open_report = false +volume = true +high = true +low = true +last = true + +[[trade]] +code = 15 +name = "BUNCHEDSOLD" +description = "Several trades were bunched into one trade report, and the report is late. *Update last if this is first trade." +cancel = false +late_report = true +auto_executed = false +open_report = false +volume = true +high = true +low = true +last = true + +[[trade]] +code = 16 +name = "NONBOARDLOT" +description = "Size of trade is less than a board lot (oddlot). A board lot is usually 1,00 shares. Note this is Canadian markets." +cancel = false +late_report = false +auto_executed = false +open_report = false +volume = true +high = false +low = false +last = false + +[[trade]] +code = 17 +name = "POSIT" +description = "POSIT Canada is an electronic order matching system that prices trades at the mid-point of the bid and ask in the continuous market." +cancel = false +late_report = false +auto_executed = false +open_report = false +volume = true +high = true +low = true +last = false + +[[trade]] +code = 18 +name = "AUTOEXECUTION" +description = "Transaction executed electronically. Soley for information. Only found in OPRA -- options trades, and quite common." +cancel = false +late_report = false +auto_executed = true +open_report = false +volume = true +high = true +low = true +last = true + +[[trade]] +code = 19 +name = "HALT" +description = "Temporary halt in trading in a particular security for one or more participants." +cancel = false +late_report = false +auto_executed = false +open_report = false +volume = false +high = false +low = false +last = false + +[[trade]] +code = 20 +name = "DELAYED" +description = "Indicates a delayed opening" +cancel = false +late_report = false +auto_executed = false +open_report = false +volume = true +high = false +low = false +last = false + +[[trade]] +code = 21 +name = "REOPEN" +description = "Reopening of a contract that was previously halted." +cancel = false +late_report = false +auto_executed = false +open_report = false +volume = true +high = true +low = true +last = true + +[[trade]] +code = 22 +name = "ACQUISITION" +description = "Transaction on exchange as a result of an Exchange Acquisition" +cancel = false +late_report = false +auto_executed = false +open_report = false +volume = true +high = true +low = true +last = true + +[[trade]] +code = 23 +name = "CASHMARKET" +description = "Cash only Market. All trade reports for this session will be settled in cash. note: differs from CashSale in that the trade marked as CashSale is an exception -- that is, most trades are settled using regular conditions." +cancel = false +late_report = false +auto_executed = false +open_report = false +volume = true +high = true +low = true +last = true + +[[trade]] +code = 24 +name = "NEXTDAYMARKET" +description = "Next Day Only Market. All trades reports for this session will be settled the next day. Note: differs from NextDay in that the trade marked as NextDay is an exception -- that is, most trades are settled using regular conditions." +cancel = false +late_report = false +auto_executed = false +open_report = false +volume = true +high = true +low = true +last = true + +[[trade]] +code = 25 +name = "BURSTBASKET" +description = "Specialist bought or sold this stock as part of an execution of a specific basket of stocks." +cancel = false +late_report = false +auto_executed = false +open_report = false +volume = true +high = true +low = true +last = true + +[[trade]] +code = 26 +name = "OPENDETAIL" +description = "107-113, 130, 160 Deleted an existing Sale Condition (Note: the code may be repurposed at a future date): 'G' - 'Opening/Reopening Trade Detail' This trade is one of several trades that made up the open report trade. Often the open report has a large size which was made up of orders placed overnight. After trading has commenced, the individual trades of the open report trade are sent with this condition. Note it doesn't update volume, high, low, or last because it's already been accounted for in the open report." +cancel = false +late_report = true +auto_executed = false +open_report = false +volume = false +high = false +low = false +last = false + +[[trade]] +code = 27 +name = "INTRADETAIL" +description = "This trade is one of several trades that made up a previous trade. Similar to OpenDetail but refers to a trade report that was not the opening trade report." +cancel = false +late_report = true +auto_executed = false +open_report = false +volume = false +high = false +low = false +last = false + +[[trade]] +code = 28 +name = "BASKETONCLOSE" +description = "A trade consisting of a paired basket order to be executed based on the closing value of an index. These trades are reported after the close when the index closing value is known." +cancel = false +late_report = true +auto_executed = false +open_report = false +volume = true +high = false +low = false +last = false + +[[trade]] +code = 29 +name = "RULE155" +description = "AMEX only rule 155. Sale of block at one clean-up price." +cancel = false +late_report = false +auto_executed = false +open_report = false +volume = true +high = true +low = true +last = true + +[[trade]] +code = 30 +name = "DISTRIBUTION" +description = "Sale of a large block of stock in a way that price is not adversely affected." +cancel = false +late_report = false +auto_executed = false +open_report = false +volume = true +high = true +low = true +last = true + +[[trade]] +code = 31 +name = "SPLIT" +description = "Execution in 2 markets when the specialist or MM in the market first receiving the order agrees to execute a portion of it at whatever price is realized in another market to which the balance of the order is forwarded for execution." +cancel = false +late_report = false +auto_executed = false +open_report = false +volume = true +high = true +low = true +last = true + +[[trade]] +code = 32 +name = "REGULARSETTLE" +description = "RegularSettle" +cancel = false +late_report = false +auto_executed = false +open_report = false +volume = true +high = true +low = true +last = true + +[[trade]] +code = 33 +name = "CUSTOMBASKETCROSS" +description = "One of two types: 2 paired but seperate orders in which a market maker or member facilitates both sides of a remaining portion of a basket. A split basket plus an entire basket where the market maker or member facilitates the remaining shares of the split basket." +cancel = false +late_report = false +auto_executed = false +open_report = false +volume = true +high = false +low = false +last = false + +[[trade]] +code = 34 +name = "ADJTERMS" +description = "Terms have been adjusted to reflect stock split/dividend or similar event." +cancel = false +late_report = false +auto_executed = false +open_report = false +volume = true +high = true +low = true +last = true + +[[trade]] +code = 35 +name = "SPREAD" +description = "Spread between 2 options in the same options class." +cancel = false +late_report = false +auto_executed = false +open_report = false +volume = true +high = true +low = true +last = true + +[[trade]] +code = 36 +name = "STRADDLE" +description = "Straddle between 2 options in the same options class." +cancel = false +late_report = false +auto_executed = false +open_report = false +volume = true +high = true +low = true +last = true + +[[trade]] +code = 37 +name = "BUYWRITE" +description = "This is the option part of a covered call." +cancel = false +late_report = false +auto_executed = false +open_report = false +volume = true +high = true +low = true +last = true + +[[trade]] +code = 38 +name = "COMBO" +description = "A buy and a sell in 2 or more options in the same class." +cancel = false +late_report = false +auto_executed = false +open_report = false +volume = true +high = true +low = true +last = true + +[[trade]] +code = 39 +name = "STPD" +description = "Traded at price agreed upon by the floor following a non-stopped trade of the same series at the same price." +cancel = false +late_report = false +auto_executed = false +open_report = false +volume = true +high = true +low = true +last = true + +[[trade]] +code = 40 +name = "CANC" +description = "Cancel a previously reported trade - it will not be the first or last trade record. note: If the most recent report is Out of seq, SoldLast, or a type that does not qualify to set the last, that report can be considered in processing the cancel." +cancel = true +late_report = false +auto_executed = false +open_report = false +volume = false +high = false +low = false +last = false + +[[trade]] +code = 41 +name = "CANCLAST" +description = "Cancel the most recent trade report that is qualified to set the last." +cancel = true +late_report = false +auto_executed = false +open_report = false +volume = false +high = false +low = false +last = false + +[[trade]] +code = 42 +name = "CANCOPEN" +description = "Cancel the opening trade report." +cancel = true +late_report = false +auto_executed = false +open_report = false +volume = false +high = false +low = false +last = false + +[[trade]] +code = 43 +name = "CANCONLY" +description = "Cancel the only trade report. There is only one trade report, cancel it." +cancel = true +late_report = false +auto_executed = false +open_report = false +volume = false +high = false +low = false +last = false + +[[trade]] +code = 44 +name = "CANCSTPD" +description = "Cancel the trade report that has the condition STPD." +cancel = true +late_report = false +auto_executed = false +open_report = false +volume = false +high = false +low = false +last = false + +[[trade]] +code = 45 +name = "MATCHCROSS" +description = "CTS and UTP: Cross Trade A Cross Trade a trade transaction resulting from a market center's crossing session." +cancel = false +late_report = false +auto_executed = false +open_report = false +volume = true +high = true +low = true +last = true + +[[trade]] +code = 46 +name = "FASTMARKET" +description = "Term used to define unusually hectic market conditions." +cancel = false +late_report = false +auto_executed = false +open_report = false +volume = true +high = true +low = true +last = true + +[[trade]] +code = 47 +name = "NOMINAL" +description = "Nominal price. A calculated price primarily generated to represent the fair market value of an inactive instrument for the purpose of determining margin requirements and evaluating position risk. Common in futures and futures options." +cancel = false +late_report = false +auto_executed = false +open_report = false +volume = true +high = true +low = true +last = true + +[[trade]] +code = 48 +name = "CABINET" +description = "A trade in a deep out-of-the-money option priced at one-half the tick value. Used by options traders to liquidate positions." +cancel = false +late_report = false +auto_executed = false +open_report = true +volume = false +high = false +low = false +last = false + +[[trade]] +code = 49 +name = "BLANKPRICE" +description = "Sent by an exchange to blank out the associated price (bid, ask or trade)." +cancel = false +late_report = false +auto_executed = false +open_report = false +volume = false +high = false +low = false +last = false + +[[trade]] +code = 50 +name = "NOTSPECIFIED" +description = "An unspecified (generalized) condition." +cancel = false +late_report = false +auto_executed = false +open_report = false +volume = false +high = false +low = false +last = false + +[[trade]] +code = 51 +name = "MCOFFICIALCLOSE" +description = "The Official closing value as determined by a Market Center." +cancel = false +late_report = false +auto_executed = false +open_report = false +volume = false +high = false +low = false +last = false + +[[trade]] +code = 52 +name = "SPECIALTERMS" +description = "Indicates that all trades executed will be settled in other than the regular manner." +cancel = false +late_report = false +auto_executed = false +open_report = false +volume = true +high = true +low = true +last = true + +[[trade]] +code = 53 +name = "CONTINGENTORDER" +description = "The result of an order placed by a Participating Organization on behalf of a client for one security and contingent on the execution of a second order placed by the same client for an offsetting volume of a related security." +cancel = false +late_report = false +auto_executed = false +open_report = false +volume = true +high = true +low = true +last = true + +[[trade]] +code = 54 +name = "INTERNALCROSS" +description = "A cross between two client accounts of a Participating Organization which are managed by a single firm acting as portfolio manager with discretionary authority to manage the investment portfolio granted by each of the clients. This was originally from Toronto Stock Exchange (TSX). Information located here. [URL1: http://tmx.complinet.com/fr/display/display.html?rbid=2073&element_id=421&record_id=681&print=1]" +cancel = false +late_report = false +auto_executed = false +open_report = false +volume = true +high = true +low = true +last = true + +[[trade]] +code = 55 +name = "STOPPEDREGULAR" +description = "Stopped Stock Regular Trade." +cancel = false +late_report = false +auto_executed = false +open_report = false +volume = true +high = true +low = true +last = true + +[[trade]] +code = 56 +name = "STOPPEDSOLDLAST" +description = "Stopped Stock SoldLast Trade" +cancel = false +late_report = false +auto_executed = false +open_report = false +volume = false +high = true +low = true +last = true + +[[trade]] +code = 57 +name = "STOPPEDOUTOFSEQ" +description = "Stopped Stock -- Out of Sequence." +cancel = false +late_report = true +auto_executed = false +open_report = false +volume = false +high = true +low = true +last = false + +[[trade]] +code = 58 +name = "BASIS" +description = "A transaction involving a basket of securities or an index participation unit that is transacted at prices achieved through the execution of related exchange-traded derivative instruments, which may include index futures, index options and index participation units in an amount that will correspond to an equivalent market exposure." +cancel = false +late_report = false +auto_executed = false +open_report = false +volume = true +high = true +low = true +last = true + +[[trade]] +code = 59 +name = "VWAP" +description = "Volume Weighted Average Price. A transaction for the purpose of executing trades at a volume-weighted average price of the security traded for a continuous period on or during a trading day on the exchange." +cancel = false +late_report = false +auto_executed = false +open_report = false +volume = true +high = false +low = false +last = false + +[[trade]] +code = 60 +name = "SPECIALSESSION" +description = "Occurs when an order is placed by a purchase order on behalf of a client for execution in the Special Trading Session at the last sale price." +cancel = false +late_report = false +auto_executed = false +open_report = false +volume = true +high = false +low = false +last = false + +[[trade]] +code = 61 +name = "PRICEVOLUMEADJ" +description = "Used to make volume and price corrections to match official exchange values." +cancel = false +late_report = false +auto_executed = false +open_report = false +volume = false +high = false +low = false +last = false + +[[trade]] +code = 62 +name = "OPENREPORT" +description = "Indicates an opening trade report." +cancel = false +late_report = false +auto_executed = false +open_report = false +volume = true +high = true +low = true +last = false + +[[trade]] +code = 63 +name = "MARKETONCLOSE" +description = "The Official closing value as determined by a Market Center." +cancel = false +late_report = false +auto_executed = false +open_report = false +volume = true +high = true +low = true +last = true + +[[trade]] +code = 64 +name = "SETTLEPRICE" +description = "Settlement Price" +cancel = false +late_report = false +auto_executed = false +open_report = false +volume = false +high = false +low = false +last = false + +[[trade]] +code = 65 +name = "OUTOFSEQPREMKT" +description = "An out of sequence trade that exectuted in pre or post market -- a combination of FormT and OutOfSeq." +cancel = false +late_report = true +auto_executed = false +open_report = false +volume = true +high = false +low = false +last = false + +[[trade]] +code = 66 +name = "MCOFFICIALOPEN" +description = "Indicates the 'Official' opening value as determined by a Market Center. This transaction report will contain the market center generated opening price." +cancel = false +late_report = false +auto_executed = false +open_report = false +volume = false +high = false +low = false +last = false + +[[trade]] +code = 67 +name = "FUTURESSPREAD" +description = "Execution was part of a spread with another futures contract." +cancel = false +late_report = false +auto_executed = false +open_report = false +volume = true +high = true +low = true +last = true + +[[trade]] +code = 68 +name = "OPENRANGE" +description = "Two trade prices are used to indicate an opening range representing the high and low prices during the first 30 seconds or so of trading." +cancel = false +late_report = false +auto_executed = false +open_report = false +volume = false +high = true +low = true +last = false + +[[trade]] +code = 69 +name = "CLOSERANGE" +description = "Two trade prices are used to indicate an opening range representing the high and low prices during the last 30 seconds or so of trading." +cancel = false +late_report = false +auto_executed = false +open_report = false +volume = false +high = true +low = true +last = false + +[[trade]] +code = 70 +name = "NOMINALCABINET" +description = "Nominal Cabinet" +cancel = false +late_report = false +auto_executed = false +open_report = false +volume = false +high = false +low = false +last = false + +[[trade]] +code = 71 +name = "CHANGINGTRANS" +description = "Changing Transaction" +cancel = false +late_report = false +auto_executed = false +open_report = false +volume = true +high = true +low = true +last = true + +[[trade]] +code = 72 +name = "CHANGINGTRANSCAB" +description = "Changing Cabinet Transaction" +cancel = false +late_report = false +auto_executed = false +open_report = false +volume = false +high = false +low = false +last = false + +[[trade]] +code = 73 +name = "NOMINALUPDATE" +description = "Nominal price update" +cancel = false +late_report = false +auto_executed = false +open_report = false +volume = false +high = false +low = false +last = false + +[[trade]] +code = 74 +name = "PITSETTLEMENT" +description = '''Sent with a "pit session" settlement price to the electronic session, for the purpose of computing net change from the next day electronic session and the prior session settlement price.''' +cancel = false +late_report = false +auto_executed = false +open_report = false +volume = false +high = false +low = false +last = false + +[[trade]] +code = 75 +name = "BLOCKTRADE" +description = "An executed trade of a large number of shares, typically 10,000 shares or more." +cancel = false +late_report = false +auto_executed = false +open_report = false +volume = true +high = true +low = true +last = true + +[[trade]] +code = 76 +name = "EXGFORPHYSICAL" +description = "Exchange Future for Physical" +cancel = false +late_report = false +auto_executed = false +open_report = false +volume = true +high = true +low = true +last = true + +[[trade]] +code = 77 +name = "VOLUMEADJUSTMENT" +description = "An adjustment made to the cumulative trading volume for a trading session." +cancel = false +late_report = false +auto_executed = false +open_report = false +volume = true +high = false +low = false +last = false + +[[trade]] +code = 78 +name = "VOLATILITYTRADE" +description = "Volatility trade" +cancel = false +late_report = false +auto_executed = false +open_report = false +volume = true +high = true +low = true +last = true + +[[trade]] +code = 79 +name = "YELLOWFLAG" +description = "Appears when reporting exchnge may be experiencing technical difficulties." +cancel = false +late_report = false +auto_executed = false +open_report = false +volume = true +high = true +low = true +last = true + +[[trade]] +code = 80 +name = "FLOORPRICE" +description = "Distinguishes a floor Bid/Ask from a member Bid Ask on LME" +cancel = false +late_report = false +auto_executed = false +open_report = false +volume = true +high = true +low = true +last = true + +[[trade]] +code = 81 +name = "OFFICIALPRICE" +description = "Official bid/ask price used by LME." +cancel = false +late_report = false +auto_executed = false +open_report = false +volume = true +high = true +low = true +last = true + +[[trade]] +code = 82 +name = "UNOFFICIALPRICE" +description = "Unofficial bid/ask price used by LME." +cancel = false +late_report = false +auto_executed = false +open_report = false +volume = true +high = true +low = true +last = true + +[[trade]] +code = 83 +name = "MIDBIDASKPRICE" +description = "A price halfway between the bid and ask on LME." +cancel = false +late_report = false +auto_executed = false +open_report = false +volume = true +high = true +low = true +last = true + +[[trade]] +code = 84 +name = "ENDSESSIONHIGH" +description = "End of Session High Price." +cancel = false +late_report = false +auto_executed = false +open_report = false +volume = false +high = true +low = false +last = false + +[[trade]] +code = 85 +name = "ENDSESSIONLOW" +description = "End of Session Low Price." +cancel = false +late_report = false +auto_executed = false +open_report = false +volume = false +high = false +low = true +last = false + +[[trade]] +code = 86 +name = "BACKWARDATION" +description = "A condition where the immediate delivery price is higher than the future delivery price. Opposite of Contango." +cancel = false +late_report = false +auto_executed = false +open_report = false +volume = true +high = true +low = true +last = true + +[[trade]] +code = 87 +name = "CONTANGO" +description = "A condition where the future delivery price is higher than the immediate delivery price. Opposite of Backwardation." +cancel = false +late_report = false +auto_executed = false +open_report = false +volume = true +high = true +low = true +last = true + +[[trade]] +code = 88 +name = "HOLIDAY" +description = "In Development" +cancel = false +late_report = false +auto_executed = false +open_report = false +volume = true +high = true +low = true +last = true + +[[trade]] +code = 89 +name = "PREOPENING" +description = "The period of time prior to the market opening time (7:00 A.M. - 9:30 A.M.) during which orders are entered into the market for the Opening." +cancel = false +late_report = false +auto_executed = false +open_report = false +volume = true +high = false +low = false +last = false + +[[trade]] +code = 90 +name = "POSTFULL" +description = "" +cancel = false +late_report = false +auto_executed = false +open_report = false +volume = false +high = false +low = false +last = false + +[[trade]] +code = 91 +name = "POSTRESTRICTED" +description = "" +cancel = false +late_report = false +auto_executed = false +open_report = false +volume = false +high = false +low = false +last = false + +[[trade]] +code = 92 +name = "CLOSINGAUCTION" +description = "" +cancel = false +late_report = false +auto_executed = false +open_report = false +volume = false +high = false +low = false +last = false + +[[trade]] +code = 93 +name = "BATCH" +description = "" +cancel = false +late_report = false +auto_executed = false +open_report = false +volume = false +high = false +low = false +last = false + +[[trade]] +code = 94 +name = "TRADING" +description = "" +cancel = false +late_report = false +auto_executed = false +open_report = false +volume = false +high = false +low = false +last = false + +[[trade]] +code = 95 +name = "INTERMARKETSWEEP" +description = '''A trade resulting from an Intermarket Sweep Order Execution. For more information on intermarket sweeps, please see the SEC NMS regulation (June 29, 2005 - PDF) . From that report: "The intermarket sweep exception enables trading centers that receive sweep orders to execute those orders immediately, without waiting for betterpriced quotations in other markets to be updated." [URL1: https://www.sec.gov/rules/final/34-51808fr.pdf]''' +cancel = false +late_report = false +auto_executed = false +open_report = false +volume = true +high = true +low = true +last = true + +[[trade]] +code = 96 +name = "DERIVATIVE" +description = "Derivatively priced." +cancel = false +late_report = false +auto_executed = false +open_report = false +volume = true +high = true +low = true +last = true + +[[trade]] +code = 97 +name = "REOPENING" +description = "Market center re-opening prints." +cancel = false +late_report = false +auto_executed = false +open_report = false +volume = true +high = true +low = true +last = true + +[[trade]] +code = 98 +name = "CLOSING" +description = "Market center closing prints. Can be used to get closing auction information for exchanges that report it, such as NYSE." +cancel = false +late_report = false +auto_executed = false +open_report = false +volume = true +high = true +low = true +last = true + +[[trade]] +code = 99 +name = "CAPELECTION" +description = "CTA Docs 78, 110, 111, 113 & 136 Redefined: Existing code 'I' in the Sale Condition field to denote the following change in value: From - Cap Election Trade To - Odd Lot Trade A trade resulting from an sweep execution where CAP orders were elected and executed outside the best bid or affer and appear as repeat trades. [URL1: https://www.ctaplan.com/publicdocs/ctaplan/notifications/announcements/trader-update/5871.pdf]" +cancel = false +late_report = false +auto_executed = false +open_report = false +volume = true +high = true +low = true +last = false + +[[trade]] +code = 100 +name = "SPOTSETTLEMENT" +description = "" +cancel = false +late_report = false +auto_executed = false +open_report = false +volume = true +high = true +low = true +last = true + +[[trade]] +code = 101 +name = "BASISHIGH" +description = "" +cancel = false +late_report = false +auto_executed = false +open_report = false +volume = true +high = true +low = true +last = false + +[[trade]] +code = 102 +name = "BASISLOW" +description = "" +cancel = false +late_report = false +auto_executed = false +open_report = false +volume = true +high = true +low = true +last = false + +[[trade]] +code = 103 +name = "YIELD" +description = "Applies to bid and ask yield updates for Cantor Treasuries" +cancel = false +late_report = false +auto_executed = false +open_report = false +volume = false +high = false +low = false +last = false + +[[trade]] +code = 104 +name = "PRICEVARIATION" +description = "" +cancel = false +late_report = false +auto_executed = false +open_report = false +volume = false +high = false +low = false +last = false + +[[trade]] +code = 105 +name = "CONTINGENTTRADEFORMERLYSTOCKOPTION" +description = "Effective July 2015 ~ A Sale Condition used to identify a transaction where the execution of the transaction is contingent upon some event. SIAC Trader Update: February 25, 2015 (PDF) Previously: StockOption [URL1: https://www.ctaplan.com/publicdocs/ctaplan/notifications/announcements/trader-update/6113.pdf]" +cancel = false +late_report = false +auto_executed = false +open_report = false +volume = true +high = false +low = false +last = false + +[[trade]] +code = 106 +name = "STOPPEDIM" +description = "Transaction order which was stopped at a price that did not constitute a Trade-Through on another market. Valid trade do not update last" +cancel = false +late_report = false +auto_executed = false +open_report = false +volume = true +high = true +low = true +last = false + +[[trade]] +code = 107 +name = "BENCHMARK" +description = "" +cancel = false +late_report = false +auto_executed = false +open_report = false +volume = false +high = false +low = false +last = false + +[[trade]] +code = 108 +name = "TRADETHRUEXEMPT" +description = "This condition will be assigned for Tapes A/B and UTP when no Trade Through Exempt reason is given, and the Trade Through Exempt indicator is set. For Tapes A/B and UTP, these trades are eligible to update O/H/L/L/V. For OPRA, these trades only update volume." +cancel = false +late_report = false +auto_executed = false +open_report = false +volume = false +high = false +low = false +last = true + +[[trade]] +code = 109 +name = "IMPLIED" +description = "These trades are result of a spread trade. The exchange sends a leg price on each future for spread transactions. These trades do not update O/H/L/L but they update volume. We are now sending these spread trades for Globex exchanges: CME, NYMEX, COMEX, CBOT, MGE, KCBT and DME." +cancel = false +late_report = false +auto_executed = false +open_report = false +volume = true +high = false +low = false +last = false + +[[trade]] +code = 110 +name = "OTC" +description = "" +cancel = false +late_report = false +auto_executed = false +open_report = false +volume = false +high = false +low = false +last = false + +[[trade]] +code = 111 +name = "MKTSUPERVISION" +description = "" +cancel = false +late_report = false +auto_executed = false +open_report = false +volume = false +high = false +low = false +last = false + +[[trade]] +code = 112 +name = "RESERVED_77" +description = "" +cancel = false +late_report = false +auto_executed = false +open_report = false +volume = false +high = false +low = false +last = false + +[[trade]] +code = 113 +name = "RESERVED_91" +description = "" +cancel = false +late_report = false +auto_executed = false +open_report = false +volume = false +high = false +low = false +last = false + +[[trade]] +code = 114 +name = "CONTINGENTUTP" +description = "" +cancel = false +late_report = false +auto_executed = false +open_report = false +volume = false +high = false +low = false +last = false + +[[trade]] +code = 115 +name = "ODDLOT" +description = "This indicates any trade with size between 1-99." +cancel = false +late_report = false +auto_executed = false +open_report = false +volume = true +high = false +low = false +last = false + +[[trade]] +code = 116 +name = "RESERVED_89" +description = "" +cancel = false +late_report = false +auto_executed = false +open_report = false +volume = false +high = false +low = false +last = false + +[[trade]] +code = 117 +name = "CORRECTEDCSLAST" +description = "This allows for a mechanism to correct the official close on the consolidated tape." +cancel = false +late_report = false +auto_executed = false +open_report = false +volume = false +high = true +low = true +last = true + +[[trade]] +code = 118 +name = "OPRAEXTHOURS" +description = '''OPRA extended trading hours session. Equivalent to the OPRA "Session Indicator" with ASCII value of 'X' (Pre-Market extended hours trading session)(Obselete, see condition 148).''' +cancel = false +late_report = false +auto_executed = false +open_report = false +volume = false +high = false +low = false +last = false + +[[trade]] +code = 119 +name = "RESERVED_78" +description = "" +cancel = false +late_report = false +auto_executed = false +open_report = false +volume = false +high = false +low = false +last = false + +[[trade]] +code = 120 +name = "RESERVED_81" +description = "" +cancel = false +late_report = false +auto_executed = false +open_report = false +volume = false +high = false +low = false +last = false + +[[trade]] +code = 121 +name = "RESERVED_84" +description = "" +cancel = false +late_report = false +auto_executed = false +open_report = false +volume = false +high = false +low = false +last = false + +[[trade]] +code = 122 +name = "RESERVED_878" +description = "" +cancel = false +late_report = false +auto_executed = false +open_report = false +volume = false +high = false +low = false +last = false + +[[trade]] +code = 123 +name = "RESERVED_90" +description = "" +cancel = false +late_report = false +auto_executed = false +open_report = false +volume = false +high = false +low = false +last = false + +[[trade]] +code = 124 +name = "QUALIFIEDCONTINGENTTRADE" +description = "Effective July 2015 ~ A transaction consisting of two or more component orders, executed as agent or principal, that meets each of the following elements: At least one component order is for an NMS stock. All components are effected with a product or price contingency that either has been agreed to by the respective counterparties or arranged for by a broker-dealer as principal or agent. The execution of one component is contingent upon the execution of all other components at or near the same time. The specific relationship between the component orders ( e.g. the spread between the prices of the component orders) is determined at the time the contingent order is placed. The component orders bear a derivative relationship to one another, represent different classes of shares of the same issuer, or involve the securities of participants in mergers or with intentions to merge that have been announced or since cancelled. 25 The Exempted NMS Stock Transaction is fully hedged (without regard to any prior existing position) as a result of the other components of the contingent trade. 26 [URL1: https://www.sec.gov/divisions/marketreg/nmsfaq610-11.htm#P318_53173] [URL2: https://www.sec.gov/divisions/marketreg/nmsfaq610-11.htm#P319_53923]" +cancel = false +late_report = false +auto_executed = false +open_report = false +volume = true +high = false +low = false +last = false + +[[trade]] +code = 125 +name = "SINGLELEGAUCTIONNONISO" +description = '''Transaction was the execution of an electronic order which was "stopped" at a price and traded in a two sided auction mechanism that goes through an exposure period. Such auctions mechanisms include and not limited to Price Improvement, Facilitation or Solicitation Mechanism.''' +cancel = false +late_report = false +auto_executed = false +open_report = false +volume = true +high = true +low = true +last = true + +[[trade]] +code = 126 +name = "SINGLELEGAUCTIONISO" +description = '''Transaction was the execution of an Intermarket Sweep electronic order which was "stopped" at a price and traded in a two sided auction mechanism that goes through an exposure period. Suchauctions mechanisms include and not limited to Price Improvement, Facilitation or Solicitation Mechanism marked as ISO.''' +cancel = false +late_report = false +auto_executed = false +open_report = false +volume = true +high = true +low = true +last = true + +[[trade]] +code = 127 +name = "SINGLELEGCROSSNONISO" +description = '''Transaction was the execution of an electronic order which was "stopped" at a price and traded in a two sided crossing mechanism that does not go through an exposure period. Such crossing mechanisms include and not limited to Customer to Customer Cross and QCC with a single option leg.''' +cancel = false +late_report = false +auto_executed = false +open_report = false +volume = true +high = true +low = true +last = true + +[[trade]] +code = 128 +name = "SINGLELEGCROSSISO" +description = '''Transaction was the execution of an Intermarket Sweep electronic order which was "stopped" at a price and traded in a two sided crossing mechanism that does not go through an exposure period. Such crossing mechanisms include and not limited to Customer to Customer Cross.''' +cancel = false +late_report = false +auto_executed = false +open_report = false +volume = true +high = true +low = true +last = true + +[[trade]] +code = 129 +name = "SINGLELEGFLOORTRADE" +description = "Transaction represents a non-electronic trade executed on a trading floor. Execution of Paired and Non-Paired Auctions and Cross orders on an exchange floor are also included in this category." +cancel = false +late_report = false +auto_executed = false +open_report = false +volume = true +high = true +low = true +last = true + +[[trade]] +code = 130 +name = "MULTILEGAUTOELECTRONICTRADE" +description = "Transactionrepresents an electronic execution of a multi leg order traded in a complex order book." +cancel = false +late_report = false +auto_executed = false +open_report = false +volume = true +high = true +low = true +last = true + +[[trade]] +code = 131 +name = "MULTILEGAUCTION" +description = '''Transaction was the execution of an electronic multi leg order which was "stopped" at a price and traded in a two sided auction mechanism that goes through an exposure period in a complex order book. Such auctions mechanisms include and not limited to Price Improvement, Facilitation or Solicitation Mechanism.''' +cancel = false +late_report = false +auto_executed = false +open_report = false +volume = true +high = true +low = true +last = true + +[[trade]] +code = 132 +name = "MULTILEGCROSS" +description = '''Transaction was the execution of an electronic multi leg order which was "stopped" at a price and traded in a two sided crossing mechanism that does not go through an exposure period. Such crossing mechanisms include and not limited to Customer to Customer Cross and QCC with two or more options legs.''' +cancel = false +late_report = false +auto_executed = false +open_report = false +volume = true +high = true +low = true +last = true + +[[trade]] +code = 133 +name = "MULTILEGFLOORTRADE" +description = "Transaction represents a non-electronic multi leg order trade executed against other multi-leg order(s) on a trading floor. Execution of Paired and Non-Paired Auctions and Cross orders on an exchange floor are also included in this category." +cancel = false +late_report = false +auto_executed = false +open_report = false +volume = true +high = true +low = true +last = true + +[[trade]] +code = 134 +name = "MULTILEGAUTOELECTRADEAGAINSTSINGLELEG" +description = "Transaction represents an electronic execution of a multi Leg order traded against single leg orders/quotes." +cancel = false +late_report = false +auto_executed = false +open_report = false +volume = true +high = true +low = true +last = true + +[[trade]] +code = 135 +name = "STOCKOPTIONSAUCTION" +description = '''Transaction was the execution of an electronic multi leg stock/options order which was "stopped" at a price and traded in a two sided auction mechanism that goes through an exposure period in a complex order book. Such auctions mechanisms include and not limited to Price Improvement, Facilitation or Solicitation Mechanism.''' +cancel = false +late_report = false +auto_executed = false +open_report = false +volume = true +high = true +low = true +last = true + +[[trade]] +code = 136 +name = "MULTILEGAUCTIONAGAINSTSINGLELEG" +description = '''Transaction was the execution of an electronic multi leg order which was "stopped" at a price and traded in a two sided auction mechanism that goes through an exposure period and trades against single leg orders/ quotes. Such auctions mechanisms include and not limited to Price Improvement, Facilitation or Solicitation Mechanism.''' +cancel = false +late_report = false +auto_executed = false +open_report = false +volume = true +high = true +low = true +last = true + +[[trade]] +code = 137 +name = "MULTILEGFLOORTRADEAGAINSTSINGLELEG" +description = "Transaction represents a non-electronic multi leg order trade executed on a trading floor against single leg orders/ quotes. Execution of Paired and Non-Paired Auctions on an exchange floor are also included in this category." +cancel = false +late_report = false +auto_executed = false +open_report = false +volume = true +high = true +low = true +last = true + +[[trade]] +code = 138 +name = "STOCKOPTIONSAUTOELECTRADE" +description = "Transaction represents an electronic execution of a multi leg stock/options order traded in a complex order book." +cancel = false +late_report = false +auto_executed = false +open_report = false +volume = true +high = true +low = true +last = true + +[[trade]] +code = 139 +name = "STOCKOPTIONSCROSS" +description = '''Transaction was the execution of an electronic multi leg stock/options order which was "stopped" at a price and traded in a two sided crossing mechanism that does not go through an exposure period. Such crossing mechanisms include and not limited to Customer to Customer Cross.''' +cancel = false +late_report = false +auto_executed = false +open_report = false +volume = true +high = true +low = true +last = true + +[[trade]] +code = 140 +name = "STOCKOPTIONSFLOORTRADE" +description = "Transaction represents a non-electronic multi leg order stock/options trade executed on a trading floor in a Complex order book. Execution of Paired and Non-Paired Auctions and Cross orders on an exchange floor are also included in this category." +cancel = false +late_report = false +auto_executed = false +open_report = false +volume = true +high = true +low = true +last = true + +[[trade]] +code = 141 +name = "STOCKOPTIONSAUTOELECTRADEAGAINSTSINGLELEG" +description = "Transaction represents an electronic execution of a multi Leg stock/options order traded against single leg orders/quotes." +cancel = false +late_report = false +auto_executed = false +open_report = false +volume = true +high = true +low = true +last = true + +[[trade]] +code = 142 +name = "STOCKOPTIONSAUCTIONAGAINSTSINGLELEG" +description = '''Transaction was the execution of an electronic multi leg stock/options order which was "stopped" at a price and traded in a two sided auction mechanism that goes through an exposure periodand trades against single leg orders/ quotes. Such auctions mechanisms include and not limited to Price Improvement, Facilitation or Solicitation Mechanism.''' +cancel = false +late_report = false +auto_executed = false +open_report = false +volume = true +high = true +low = true +last = true + +[[trade]] +code = 143 +name = "STOCKOPTIONSFLOORTRADEAGAINSTSINGLELEG" +description = "Transaction represents a non-electronic multi leg stock/options order trade executed on a trading floor against single leg orders/ quotes. Execution of Paired and Non-Paired Auctions on an exchange floor are also included in this category." +cancel = false +late_report = false +auto_executed = false +open_report = false +volume = true +high = true +low = true +last = true + +[[trade]] +code = 144 +name = "MULTILEGFLOORTRADEOFPROPRIETARYPRODUCTS" +description = "Transaction represents execution of a proprietary product non-electronic multi leg order with at least 3 legs. The trade price may be outside the current NBBO." +cancel = false +late_report = false +auto_executed = false +open_report = false +volume = true +high = true +low = true +last = true + +[[trade]] +code = 145 +name = "BIDAGGRESSOR" +description = "Aggressor of the trade is on the buy side." +cancel = false +late_report = false +auto_executed = false +open_report = false +volume = true +high = true +low = true +last = true + +[[trade]] +code = 146 +name = "ASKAGGRESSOR" +description = "Aggressor of the trade is on the sell side." +cancel = false +late_report = false +auto_executed = false +open_report = false +volume = true +high = true +low = true +last = true + +[[trade]] +code = 147 +name = "MULTILATERALCOMPRESSIONTRADEOFPROPRIETARYDATAPRODUCTS" +description = "Transaction represents an execution in a proprietary product done as part of a multilateral compression. Trades are executed outside of regular trading hours at prices derived from end of day markets." +cancel = false +late_report = false +auto_executed = false +open_report = false +volume = true +high = false +low = false +last = false + +[[trade]] +code = 148 +name = "EXTENDEDHOURSTRADE" +description = "Transaction represents a trade that was executed outside of regular market hours." +cancel = false +late_report = false +auto_executed = false +open_report = false +volume = true +high = false +low = false +last = false diff --git a/crates/tdbe/src/conditions/mod.rs b/crates/tdbe/src/conditions/mod.rs new file mode 100644 index 00000000..9244ef80 --- /dev/null +++ b/crates/tdbe/src/conditions/mod.rs @@ -0,0 +1,426 @@ +//! Trade and quote condition lookup tables for `ThetaData` market data. +//! +//! Source-of-truth lives in `crates/tdbe/data/{trade,quote}_conditions.toml`. +//! At build time, `build.rs` reads the TOML and regenerates +//! `tables_generated.rs`, which is committed so consumers building from +//! crates.io don't need to re-run codegen. +//! +//! The public surface — [`TradeCondition`], [`QuoteCondition`], +//! [`TRADE_CONDITIONS`], [`QUOTE_CONDITIONS`], and the eight lookup +//! functions — is unchanged from the previous monolithic +//! `conditions.rs`. + +mod tables_generated; + +pub use tables_generated::{QUOTE_CONDITIONS, TRADE_CONDITIONS}; + +// ─────────────────────────────────────────────────────────────────────── +// Trade conditions +// ─────────────────────────────────────────────────────────────────────── + +/// A trade condition code with its properties. +// Reason: 8 booleans match the exchange specification flags 1:1. +#[allow(clippy::struct_excessive_bools)] +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct TradeCondition { + pub code: i32, + pub name: &'static str, + pub description: &'static str, + pub cancel: bool, + pub late_report: bool, + pub auto_executed: bool, + pub open_report: bool, + pub volume: bool, + pub high: bool, + pub low: bool, + pub last: bool, +} + +/// Look up the human-readable name for a trade condition code. +/// +/// Returns `"UNKNOWN"` for codes outside the known range. +#[inline] +#[must_use] +pub fn condition_name(code: i32) -> &'static str { + usize::try_from(code) + .ok() + .filter(|&idx| idx < TRADE_CONDITIONS.len()) + .map_or("UNKNOWN", |idx| TRADE_CONDITIONS[idx].name) +} + +/// Look up the description for a trade condition code. +/// +/// Returns `""` for codes outside the known range. +/// O(1) array-index lookup. +#[inline] +#[must_use] +pub fn condition_description(code: i32) -> &'static str { + usize::try_from(code) + .ok() + .filter(|&idx| idx < TRADE_CONDITIONS.len()) + .map_or("", |idx| TRADE_CONDITIONS[idx].description) +} + +/// True if this trade condition code represents a cancellation. +#[inline] +#[must_use] +pub fn is_cancel(code: i32) -> bool { + usize::try_from(code) + .ok() + .filter(|&idx| idx < TRADE_CONDITIONS.len()) + .is_some_and(|idx| TRADE_CONDITIONS[idx].cancel) +} + +/// True if this trade condition updates volume. +#[inline] +#[must_use] +pub fn updates_volume(code: i32) -> bool { + usize::try_from(code) + .ok() + .filter(|&idx| idx < TRADE_CONDITIONS.len()) + .is_some_and(|idx| TRADE_CONDITIONS[idx].volume) +} + +// ─────────────────────────────────────────────────────────────────────── +// Quote conditions +// ─────────────────────────────────────────────────────────────────────── + +/// A quote condition code with its properties. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct QuoteCondition { + pub code: i32, + pub name: &'static str, + pub description: &'static str, + pub firm: bool, + pub halted: bool, +} + +/// Look up the human-readable name for a quote condition code. +/// +/// Returns `"UNKNOWN"` for codes outside the known range. +#[inline] +#[must_use] +pub fn quote_condition_name(code: i32) -> &'static str { + usize::try_from(code) + .ok() + .filter(|&idx| idx < QUOTE_CONDITIONS.len()) + .map_or("UNKNOWN", |idx| QUOTE_CONDITIONS[idx].name) +} + +/// Look up the description for a quote condition code. +/// +/// Returns `""` for codes outside the known range. +#[inline] +#[must_use] +pub fn quote_condition_description(code: i32) -> &'static str { + usize::try_from(code) + .ok() + .filter(|&idx| idx < QUOTE_CONDITIONS.len()) + .map_or("", |idx| QUOTE_CONDITIONS[idx].description) +} + +/// True if this quote condition is firm (binding). +#[inline] +#[must_use] +pub fn is_firm(code: i32) -> bool { + usize::try_from(code) + .ok() + .filter(|&idx| idx < QUOTE_CONDITIONS.len()) + .is_some_and(|idx| QUOTE_CONDITIONS[idx].firm) +} + +/// True if this quote condition indicates a trading halt. +#[inline] +#[must_use] +pub fn is_halted(code: i32) -> bool { + usize::try_from(code) + .ok() + .filter(|&idx| idx < QUOTE_CONDITIONS.len()) + .is_some_and(|idx| QUOTE_CONDITIONS[idx].halted) +} + +#[cfg(test)] +mod tests { + use super::*; + + // Trade condition tests + #[test] + fn trade_condition_name_valid() { + assert_eq!(condition_name(0), "REGULAR"); + assert_eq!(condition_name(40), "CANC"); + assert_eq!(condition_name(148), "EXTENDEDHOURSTRADE"); + } + + #[test] + fn trade_condition_name_out_of_range() { + assert_eq!(condition_name(-1), "UNKNOWN"); + assert_eq!(condition_name(149), "UNKNOWN"); + assert_eq!(condition_name(9999), "UNKNOWN"); + } + + #[test] + fn trade_condition_description_valid() { + assert_eq!(condition_description(0), "Regular Trade"); + assert!(condition_description(13).contains("Sets Consolidated Last")); + assert!(condition_description(5).contains("update last if only trade")); + } + + #[test] + fn trade_condition_description_out_of_range() { + assert_eq!(condition_description(-1), ""); + assert_eq!(condition_description(149), ""); + } + + #[test] + fn trade_is_cancel() { + assert!(!is_cancel(0)); + assert!(is_cancel(40)); + assert!(is_cancel(41)); + assert!(is_cancel(42)); + assert!(is_cancel(43)); + assert!(is_cancel(44)); + assert!(!is_cancel(45)); + } + + #[test] + fn trade_updates_volume() { + assert!(updates_volume(0)); + assert!(updates_volume(1)); + assert!(!updates_volume(40)); + } + + #[test] + fn trade_array_codes_contiguous() { + for (i, tc) in TRADE_CONDITIONS.iter().enumerate() { + assert_eq!( + tc.code as usize, i, + "Trade condition at index {} has code {}", + i, tc.code + ); + } + } + + #[test] + fn all_149_trade_conditions_present() { + assert_eq!(TRADE_CONDITIONS.len(), 149); + } + + // Quote condition tests + #[test] + fn quote_condition_name_valid() { + assert_eq!(quote_condition_name(0), "REGULAR"); + assert_eq!(quote_condition_name(17), "HALTED"); + assert_eq!(quote_condition_name(50), "NATIONAL_BBO"); + assert_eq!(quote_condition_name(74), "RETAIL_QTE"); + } + + #[test] + fn quote_condition_name_out_of_range() { + assert_eq!(quote_condition_name(-1), "UNKNOWN"); + assert_eq!(quote_condition_name(75), "UNKNOWN"); + } + + #[test] + fn quote_condition_description_valid() { + assert_eq!(quote_condition_description(0), "Regular two-sided quote"); + assert_eq!(quote_condition_description(17), "Trading halted"); + assert!(quote_condition_description(66).contains("Level 1")); + } + + #[test] + fn quote_condition_description_out_of_range() { + assert_eq!(quote_condition_description(-1), ""); + assert_eq!(quote_condition_description(75), ""); + } + + #[test] + fn quote_is_firm() { + assert!(is_firm(0)); + assert!(!is_firm(17)); + } + + #[test] + fn quote_is_halted() { + assert!(!is_halted(0)); + assert!(is_halted(17)); + assert!(is_halted(18)); + } + + #[test] + fn quote_array_codes_contiguous() { + for (i, qc) in QUOTE_CONDITIONS.iter().enumerate() { + assert_eq!( + qc.code as usize, i, + "Quote condition at index {} has code {}", + i, qc.code + ); + } + } + + #[test] + fn all_75_quote_conditions_present() { + assert_eq!(QUOTE_CONDITIONS.len(), 75); + } + + #[test] + fn all_trade_descriptions_have_content_where_expected() { + // Codes that must have non-empty descriptions (key market-critical ones) + let must_have_desc = [0, 1, 2, 5, 13, 40, 95, 148]; + for &code in &must_have_desc { + assert!( + !condition_description(code).is_empty(), + "Trade condition {} should have a description", + code + ); + } + } + + #[test] + fn all_quote_descriptions_have_content() { + for (i, qc) in QUOTE_CONDITIONS.iter().enumerate() { + assert!( + !qc.description.is_empty(), + "Quote condition {} ({}) should have a description", + i, + qc.name + ); + } + } + + /// Round-trip pinning test: 10 entries copied verbatim from the + /// pre-codegen `conditions.rs` (commit `bf5f8bc`). Catches any + /// drift between the TOML source-of-truth and the generated + /// `TRADE_CONDITIONS` / `QUOTE_CONDITIONS` arrays. + /// + /// If you intentionally change a condition entry, update the TOML + /// AND update the corresponding pin below in the same commit. + #[test] + fn condition_tables_pin() { + // ----- Trade pins ----- + + let t0 = TRADE_CONDITIONS[0]; + assert_eq!(t0.code, 0); + assert_eq!(t0.name, "REGULAR"); + assert_eq!(t0.description, "Regular Trade"); + assert_eq!( + ( + t0.cancel, + t0.late_report, + t0.auto_executed, + t0.open_report, + t0.volume, + t0.high, + t0.low, + t0.last + ), + (false, false, false, false, true, true, true, true) + ); + + let t30 = TRADE_CONDITIONS[30]; + assert_eq!(t30.code, 30); + assert_eq!(t30.name, "DISTRIBUTION"); + assert_eq!( + t30.description, + "Sale of a large block of stock in a way that price is not adversely affected." + ); + assert_eq!( + ( + t30.cancel, + t30.late_report, + t30.auto_executed, + t30.open_report, + t30.volume, + t30.high, + t30.low, + t30.last + ), + (false, false, false, false, true, true, true, true) + ); + + let t40 = TRADE_CONDITIONS[40]; + assert_eq!(t40.code, 40); + assert_eq!(t40.name, "CANC"); + assert!(t40.cancel); + assert!(!t40.volume); + assert!(t40 + .description + .starts_with("Cancel a previously reported trade")); + + let t60 = TRADE_CONDITIONS[60]; + assert_eq!(t60.code, 60); + assert_eq!(t60.name, "SPECIALSESSION"); + assert_eq!( + ( + t60.cancel, + t60.late_report, + t60.auto_executed, + t60.open_report, + t60.volume, + t60.high, + t60.low, + t60.last + ), + (false, false, false, false, true, false, false, false) + ); + + let t90 = TRADE_CONDITIONS[90]; + assert_eq!(t90.code, 90); + assert_eq!(t90.name, "POSTFULL"); + assert_eq!(t90.description, ""); + assert_eq!( + ( + t90.cancel, + t90.late_report, + t90.auto_executed, + t90.open_report, + t90.volume, + t90.high, + t90.low, + t90.last + ), + (false, false, false, false, false, false, false, false) + ); + + let t120 = TRADE_CONDITIONS[120]; + assert_eq!(t120.code, 120); + assert_eq!(t120.name, "RESERVED_81"); + assert_eq!(t120.description, ""); + + let t148 = TRADE_CONDITIONS[148]; + assert_eq!(t148.code, 148); + assert_eq!(t148.name, "EXTENDEDHOURSTRADE"); + assert!(t148.volume); + assert!(!t148.last); + + // Code 61 was the post-rename `PRICEVOLUMEADJ` per the prior + // refactor; pin it to defend the rename. + let t61 = TRADE_CONDITIONS[61]; + assert_eq!(t61.code, 61); + assert_eq!(t61.name, "PRICEVOLUMEADJ"); + + // ----- Quote pins ----- + + let q0 = QUOTE_CONDITIONS[0]; + assert_eq!(q0.code, 0); + assert_eq!(q0.name, "REGULAR"); + assert_eq!(q0.description, "Regular two-sided quote"); + assert_eq!((q0.firm, q0.halted), (true, false)); + + let q17 = QUOTE_CONDITIONS[17]; + assert_eq!(q17.code, 17); + assert_eq!(q17.name, "HALTED"); + assert_eq!(q17.description, "Trading halted"); + assert_eq!((q17.firm, q17.halted), (false, true)); + + let q50 = QUOTE_CONDITIONS[50]; + assert_eq!(q50.code, 50); + assert_eq!(q50.name, "NATIONAL_BBO"); + assert_eq!(q50.description, "National best bid and offer"); + + let q74 = QUOTE_CONDITIONS[74]; + assert_eq!(q74.code, 74); + assert_eq!(q74.name, "RETAIL_QTE"); + assert_eq!(q74.description, "Retail interest on both sides"); + assert_eq!((q74.firm, q74.halted), (false, false)); + } +} diff --git a/crates/tdbe/src/conditions.rs b/crates/tdbe/src/conditions/tables_generated.rs similarity index 89% rename from crates/tdbe/src/conditions.rs rename to crates/tdbe/src/conditions/tables_generated.rs index b6732e0a..f2f7e1a8 100644 --- a/crates/tdbe/src/conditions.rs +++ b/crates/tdbe/src/conditions/tables_generated.rs @@ -1,29 +1,9 @@ -//! Trade and quote condition lookup tables for `ThetaData` market data. -//! -//! Combines trade conditions (149 codes) and quote conditions (75 codes) -//! into a single module with simple index-based lookups. +// @generated DO NOT EDIT — regenerated by build.rs from data/*.toml +// +// Source-of-truth: crates/tdbe/data/trade_conditions.toml +// crates/tdbe/data/quote_conditions.toml -// ─────────────────────────────────────────────────────────────────────── -// Trade conditions -// ─────────────────────────────────────────────────────────────────────── - -/// A trade condition code with its properties. -// Reason: 8 booleans match the exchange specification flags 1:1. -#[allow(clippy::struct_excessive_bools)] -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub struct TradeCondition { - pub code: i32, - pub name: &'static str, - pub description: &'static str, - pub cancel: bool, - pub late_report: bool, - pub auto_executed: bool, - pub open_report: bool, - pub volume: bool, - pub high: bool, - pub low: bool, - pub last: bool, -} +use super::{QuoteCondition, TradeCondition}; /// All 149 trade condition codes (0..148). pub const TRADE_CONDITIONS: [TradeCondition; 149] = [ @@ -1966,65 +1946,6 @@ pub const TRADE_CONDITIONS: [TradeCondition; 149] = [ }, ]; -/// Look up the human-readable name for a trade condition code. -/// -/// Returns `"UNKNOWN"` for codes outside the known range. -#[inline] -#[must_use] -pub fn condition_name(code: i32) -> &'static str { - usize::try_from(code) - .ok() - .filter(|&idx| idx < TRADE_CONDITIONS.len()) - .map_or("UNKNOWN", |idx| TRADE_CONDITIONS[idx].name) -} - -/// Look up the description for a trade condition code. -/// -/// Returns `""` for codes outside the known range. -/// O(1) array-index lookup. -#[inline] -#[must_use] -pub fn condition_description(code: i32) -> &'static str { - usize::try_from(code) - .ok() - .filter(|&idx| idx < TRADE_CONDITIONS.len()) - .map_or("", |idx| TRADE_CONDITIONS[idx].description) -} - -/// True if this trade condition code represents a cancellation. -#[inline] -#[must_use] -pub fn is_cancel(code: i32) -> bool { - usize::try_from(code) - .ok() - .filter(|&idx| idx < TRADE_CONDITIONS.len()) - .is_some_and(|idx| TRADE_CONDITIONS[idx].cancel) -} - -/// True if this trade condition updates volume. -#[inline] -#[must_use] -pub fn updates_volume(code: i32) -> bool { - usize::try_from(code) - .ok() - .filter(|&idx| idx < TRADE_CONDITIONS.len()) - .is_some_and(|idx| TRADE_CONDITIONS[idx].volume) -} - -// ─────────────────────────────────────────────────────────────────────── -// Quote conditions -// ─────────────────────────────────────────────────────────────────────── - -/// A quote condition code with its properties. -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub struct QuoteCondition { - pub code: i32, - pub name: &'static str, - pub description: &'static str, - pub firm: bool, - pub halted: bool, -} - /// All 75 quote condition codes (0..74). pub const QUOTE_CONDITIONS: [QuoteCondition; 75] = [ QuoteCondition { @@ -2553,197 +2474,3 @@ pub const QUOTE_CONDITIONS: [QuoteCondition; 75] = [ halted: false, }, ]; - -/// Look up the human-readable name for a quote condition code. -/// -/// Returns `"UNKNOWN"` for codes outside the known range. -#[inline] -#[must_use] -pub fn quote_condition_name(code: i32) -> &'static str { - usize::try_from(code) - .ok() - .filter(|&idx| idx < QUOTE_CONDITIONS.len()) - .map_or("UNKNOWN", |idx| QUOTE_CONDITIONS[idx].name) -} - -/// Look up the description for a quote condition code. -/// -/// Returns `""` for codes outside the known range. -/// O(1) array-index lookup. -#[inline] -#[must_use] -pub fn quote_condition_description(code: i32) -> &'static str { - usize::try_from(code) - .ok() - .filter(|&idx| idx < QUOTE_CONDITIONS.len()) - .map_or("", |idx| QUOTE_CONDITIONS[idx].description) -} - -/// True if this quote condition represents a firm quote. -#[inline] -#[must_use] -pub fn is_firm(code: i32) -> bool { - usize::try_from(code) - .ok() - .filter(|&idx| idx < QUOTE_CONDITIONS.len()) - .is_some_and(|idx| QUOTE_CONDITIONS[idx].firm) -} - -/// True if this quote condition indicates a trading halt. -#[inline] -#[must_use] -pub fn is_halted(code: i32) -> bool { - usize::try_from(code) - .ok() - .filter(|&idx| idx < QUOTE_CONDITIONS.len()) - .is_some_and(|idx| QUOTE_CONDITIONS[idx].halted) -} - -#[cfg(test)] -mod tests { - use super::*; - - // Trade condition tests - #[test] - fn trade_condition_name_valid() { - assert_eq!(condition_name(0), "REGULAR"); - assert_eq!(condition_name(40), "CANC"); - assert_eq!(condition_name(148), "EXTENDEDHOURSTRADE"); - } - - #[test] - fn trade_condition_name_out_of_range() { - assert_eq!(condition_name(-1), "UNKNOWN"); - assert_eq!(condition_name(149), "UNKNOWN"); - assert_eq!(condition_name(9999), "UNKNOWN"); - } - - #[test] - fn trade_condition_description_valid() { - assert_eq!(condition_description(0), "Regular Trade"); - assert!(condition_description(13).contains("Sets Consolidated Last")); - assert!(condition_description(5).contains("update last if only trade")); - } - - #[test] - fn trade_condition_description_out_of_range() { - assert_eq!(condition_description(-1), ""); - assert_eq!(condition_description(149), ""); - } - - #[test] - fn trade_is_cancel() { - assert!(!is_cancel(0)); - assert!(is_cancel(40)); - assert!(is_cancel(41)); - assert!(is_cancel(42)); - assert!(is_cancel(43)); - assert!(is_cancel(44)); - assert!(!is_cancel(45)); - } - - #[test] - fn trade_updates_volume() { - assert!(updates_volume(0)); - assert!(updates_volume(1)); - assert!(!updates_volume(40)); - } - - #[test] - fn trade_array_codes_contiguous() { - for (i, tc) in TRADE_CONDITIONS.iter().enumerate() { - assert_eq!( - tc.code as usize, i, - "Trade condition at index {} has code {}", - i, tc.code - ); - } - } - - #[test] - fn all_149_trade_conditions_present() { - assert_eq!(TRADE_CONDITIONS.len(), 149); - } - - // Quote condition tests - #[test] - fn quote_condition_name_valid() { - assert_eq!(quote_condition_name(0), "REGULAR"); - assert_eq!(quote_condition_name(17), "HALTED"); - assert_eq!(quote_condition_name(50), "NATIONAL_BBO"); - assert_eq!(quote_condition_name(74), "RETAIL_QTE"); - } - - #[test] - fn quote_condition_name_out_of_range() { - assert_eq!(quote_condition_name(-1), "UNKNOWN"); - assert_eq!(quote_condition_name(75), "UNKNOWN"); - } - - #[test] - fn quote_condition_description_valid() { - assert_eq!(quote_condition_description(0), "Regular two-sided quote"); - assert_eq!(quote_condition_description(17), "Trading halted"); - assert!(quote_condition_description(66).contains("Level 1")); - } - - #[test] - fn quote_condition_description_out_of_range() { - assert_eq!(quote_condition_description(-1), ""); - assert_eq!(quote_condition_description(75), ""); - } - - #[test] - fn quote_is_firm() { - assert!(is_firm(0)); - assert!(!is_firm(17)); - } - - #[test] - fn quote_is_halted() { - assert!(!is_halted(0)); - assert!(is_halted(17)); - assert!(is_halted(18)); - } - - #[test] - fn quote_array_codes_contiguous() { - for (i, qc) in QUOTE_CONDITIONS.iter().enumerate() { - assert_eq!( - qc.code as usize, i, - "Quote condition at index {} has code {}", - i, qc.code - ); - } - } - - #[test] - fn all_75_quote_conditions_present() { - assert_eq!(QUOTE_CONDITIONS.len(), 75); - } - - #[test] - fn all_trade_descriptions_have_content_where_expected() { - // Codes that must have non-empty descriptions (key market-critical ones) - let must_have_desc = [0, 1, 2, 5, 13, 40, 95, 148]; - for &code in &must_have_desc { - assert!( - !condition_description(code).is_empty(), - "Trade condition {} should have a description", - code - ); - } - } - - #[test] - fn all_quote_descriptions_have_content() { - for (i, qc) in QUOTE_CONDITIONS.iter().enumerate() { - assert!( - !qc.description.is_empty(), - "Quote condition {} ({}) should have a description", - i, - qc.name - ); - } - } -} diff --git a/crates/tdbe/src/latency.rs b/crates/tdbe/src/latency.rs index 97b66f29..f23f33bb 100644 --- a/crates/tdbe/src/latency.rs +++ b/crates/tdbe/src/latency.rs @@ -4,8 +4,10 @@ //! (YYYYMMDD) into epoch nanoseconds, then subtracts from the local //! `received_at_ns` wall-clock timestamp captured at frame decode time. //! -//! No external timezone crate -- uses the same civil-date math and US DST -//! rules (Energy Policy Act 2005) as `thetadatadx::decode`. +//! Civil-date / DST primitives live in [`crate::time`]; this module is a +//! thin wrapper that adds the YYYYMMDD-and-`ms_of_day` decomposition. + +use crate::time::{civil_to_epoch_days, eastern_offset_ms}; /// Compute wire-to-application latency in nanoseconds. /// @@ -52,88 +54,6 @@ fn exchange_epoch_ns(ms_of_day: i32, date_yyyymmdd: i32) -> i64 { exchange_epoch_ms * 1_000_000 } -// --------------------------------------------------------------------------- -// Civil-date / DST helpers (same algorithm as thetadatadx::decode) -// --------------------------------------------------------------------------- - -/// Convert civil date to days since 1970-01-01 (Euclidean algorithm). -// Reason: the Euclidean date algorithm uses intentional signed/unsigned conversions -// that are safe for all valid calendar dates (year 0..9999). -#[allow(clippy::cast_sign_loss, clippy::cast_possible_wrap)] -fn civil_to_epoch_days(year: i32, month: u32, day: u32) -> i64 { - let y = if month <= 2 { - i64::from(year) - 1 - } else { - i64::from(year) - }; - let m = if month <= 2 { - i64::from(month) + 9 - } else { - i64::from(month) - 3 - }; - let era = if y >= 0 { y } else { y - 399 } / 400; - let yoe = (y - era * 400) as u64; - let doy = (153 * m as u64 + 2) / 5 + u64::from(day) - 1; - let doe = yoe * 365 + yoe / 4 - yoe / 100 + doy; - era * 146_097 + doe as i64 - 719_468 -} - -/// Eastern Time UTC offset in milliseconds for a given `epoch_ms`. -/// -/// US DST rule (Energy Policy Act of 2005): -/// - EDT (UTC-4): second Sunday of March 2:00 AM local -> first Sunday of November 2:00 AM local -/// - EST (UTC-5): rest of the year -// Reason: the Euclidean date algorithm uses intentional signed/unsigned conversions -// that are safe for all valid epoch timestamps in the market data date range. -#[allow( - clippy::cast_possible_wrap, - clippy::cast_sign_loss, - clippy::cast_possible_truncation -)] -fn eastern_offset_ms(epoch_ms: u64) -> i64 { - let epoch_secs = epoch_ms as i64 / 1_000; - let days_since_epoch = epoch_secs / 86_400; - - // Civil date from days since 1970-01-01. - let z = days_since_epoch + 719_468; - let era = if z >= 0 { z } else { z - 146_096 } / 146_097; - let doe = (z - era * 146_097) as u32; - let yoe = (doe - doe / 1_460 + doe / 36_524 - doe / 146_096) / 365; - let year = yoe as i32 + (era * 400) as i32; - let doy = doe - (365 * yoe + yoe / 4 - yoe / 100); - let mp = (5 * doy + 2) / 153; - let month = if mp < 10 { mp + 3 } else { mp - 9 }; - let year = if month <= 2 { year + 1 } else { year }; - - let dst_start_utc = march_second_sunday_utc(year); - let dst_end_utc = november_first_sunday_utc(year); - - let epoch_ms_i64 = epoch_ms as i64; - if epoch_ms_i64 >= dst_start_utc && epoch_ms_i64 < dst_end_utc { - -4 * 3_600 * 1_000 // EDT - } else { - -5 * 3_600 * 1_000 // EST - } -} - -/// Epoch ms of the second Sunday of March at 7:00 AM UTC (= 2:00 AM EST). -fn march_second_sunday_utc(year: i32) -> i64 { - let mar1 = civil_to_epoch_days(year, 3, 1); - let dow = ((mar1 + 3) % 7 + 7) % 7; // 0=Mon..6=Sun - let days_to_first_sunday = (6 - dow + 7) % 7; - let second_sunday = mar1 + days_to_first_sunday + 7; - second_sunday * 86_400_000 + 7 * 3_600 * 1_000 -} - -/// Epoch ms of the first Sunday of November at 6:00 AM UTC (= 2:00 AM EDT). -fn november_first_sunday_utc(year: i32) -> i64 { - let nov1 = civil_to_epoch_days(year, 11, 1); - let dow = ((nov1 + 3) % 7 + 7) % 7; - let days_to_first_sunday = (6 - dow + 7) % 7; - let first_sunday = nov1 + days_to_first_sunday; - first_sunday * 86_400_000 + 6 * 3_600 * 1_000 -} - #[cfg(test)] mod tests { use super::*; diff --git a/crates/tdbe/src/lib.rs b/crates/tdbe/src/lib.rs index 8abcf4eb..41931242 100644 --- a/crates/tdbe/src/lib.rs +++ b/crates/tdbe/src/lib.rs @@ -23,6 +23,7 @@ pub mod json_canon; pub mod latency; pub mod right; pub mod sequences; +pub mod time; pub mod types; // Convenience re-exports at crate root diff --git a/crates/tdbe/src/time.rs b/crates/tdbe/src/time.rs new file mode 100644 index 00000000..a67c3324 --- /dev/null +++ b/crates/tdbe/src/time.rs @@ -0,0 +1,245 @@ +//! Eastern Time + DST primitives. +//! +//! Canonical Eastern-time conversion module reused by `thetadatadx` (mdds +//! decode + flatfiles) and the `tdbe` latency path. No external timezone +//! crate dependencies — pure civil-date arithmetic with the documented US +//! DST rules. +//! +//! ## DST rules +//! +//! **2007-onward** (Energy Policy Act of 2005): +//! - EDT (UTC-4): second Sunday of March at 2:00 AM local -> first Sunday +//! of November at 2:00 AM local +//! - EST (UTC-5): rest of the year +//! +//! **Before 2007** (Uniform Time Act of 1966): +//! - EDT (UTC-4): first Sunday of April at 2:00 AM local -> last Sunday of +//! October at 2:00 AM local +//! - EST (UTC-5): rest of the year +//! +//! Transition points are computed in UTC and compared, so callers do not +//! need to round-trip through a timezone library. + +/// Eastern Time UTC offset in milliseconds for a given `epoch_ms`. +/// +/// Returns `-4 * 3_600_000` (EDT) when DST is in effect for the civil +/// year of `epoch_ms`; otherwise `-5 * 3_600_000` (EST). DST window +/// selection follows the rules documented at the module level. +// Reason: the Euclidean date algorithm uses intentional signed/unsigned conversions for valid epoch timestamps. +#[allow( + clippy::cast_possible_wrap, + clippy::cast_sign_loss, + clippy::cast_possible_truncation +)] +#[must_use] +pub fn eastern_offset_ms(epoch_ms: u64) -> i64 { + // First, determine the UTC year/month/day to find DST boundaries. + let epoch_secs = epoch_ms as i64 / 1_000; + let days_since_epoch = epoch_secs / 86_400; + + // Civil date from days since 1970-01-01 (Euclidean algorithm). + let z = days_since_epoch + 719_468; + let era = if z >= 0 { z } else { z - 146_096 } / 146_097; + let doe = (z - era * 146_097) as u32; + let yoe = (doe - doe / 1460 + doe / 36524 - doe / 146_096) / 365; + let year = yoe as i32 + (era * 400) as i32; + let doy = doe - (365 * yoe + yoe / 4 - yoe / 100); + let mp = (5 * doy + 2) / 153; + let month = if mp < 10 { mp + 3 } else { mp - 9 }; + let year = if month <= 2 { year + 1 } else { year }; + + let (dst_start_utc, dst_end_utc) = if year >= 2007 { + // Post-2007: second Sunday of March -> first Sunday of November. + ( + march_second_sunday_utc(year), + november_first_sunday_utc(year), + ) + } else { + // Pre-2007: first Sunday of April -> last Sunday of October. + (april_first_sunday_utc(year), october_last_sunday_utc(year)) + }; + + let epoch_ms_i64 = epoch_ms as i64; + if epoch_ms_i64 >= dst_start_utc && epoch_ms_i64 < dst_end_utc { + -4 * 3_600 * 1_000 // EDT + } else { + -5 * 3_600 * 1_000 // EST + } +} + +/// Epoch ms of the second Sunday of March at 7:00 AM UTC (= 2:00 AM EST). +#[must_use] +pub fn march_second_sunday_utc(year: i32) -> i64 { + // March 1 day-of-week, then find second Sunday. + let mar1 = civil_to_epoch_days(year, 3, 1); + // 1970-01-01 is Thursday. (days + 3) % 7 gives 0=Mon..6=Sun. + let dow = ((mar1 + 3) % 7 + 7) % 7; + let days_to_first_sunday = (6 - dow + 7) % 7; // days from Mar 1 to first Sunday + let second_sunday = mar1 + days_to_first_sunday + 7; // second Sunday + second_sunday * 86_400_000 + 7 * 3_600 * 1_000 // 7:00 AM UTC = 2:00 AM EST +} + +/// Epoch ms of the first Sunday of November at 6:00 AM UTC (= 2:00 AM EDT). +#[must_use] +pub fn november_first_sunday_utc(year: i32) -> i64 { + let nov1 = civil_to_epoch_days(year, 11, 1); + let dow = ((nov1 + 3) % 7 + 7) % 7; + let days_to_first_sunday = (6 - dow + 7) % 7; + let first_sunday = nov1 + days_to_first_sunday; + first_sunday * 86_400_000 + 6 * 3_600 * 1_000 // 6:00 AM UTC = 2:00 AM EDT +} + +/// Epoch ms of the first Sunday of April at 7:00 AM UTC (= 2:00 AM EST). +/// +/// Used for pre-2007 DST start (Uniform Time Act of 1966). +#[must_use] +pub fn april_first_sunday_utc(year: i32) -> i64 { + let apr1 = civil_to_epoch_days(year, 4, 1); + let dow = ((apr1 + 3) % 7 + 7) % 7; + let days_to_first_sunday = (6 - dow + 7) % 7; + let first_sunday = apr1 + days_to_first_sunday; + first_sunday * 86_400_000 + 7 * 3_600 * 1_000 // 7:00 AM UTC = 2:00 AM EST +} + +/// Epoch ms of the last Sunday of October at 6:00 AM UTC (= 2:00 AM EDT). +/// +/// Used for pre-2007 DST end (Uniform Time Act of 1966). +#[must_use] +pub fn october_last_sunday_utc(year: i32) -> i64 { + // Start from October 31 and walk back to find the last Sunday. + let oct31 = civil_to_epoch_days(year, 10, 31); + let dow = ((oct31 + 3) % 7 + 7) % 7; // 0=Mon..6=Sun + let days_back = (dow + 1) % 7; // days back from Oct 31 to last Sunday + let last_sunday = oct31 - days_back; + last_sunday * 86_400_000 + 6 * 3_600 * 1_000 // 6:00 AM UTC = 2:00 AM EDT +} + +/// Convert civil date to days since 1970-01-01 (inverse of the Euclidean algorithm). +// Reason: the Euclidean date algorithm uses intentional signed/unsigned conversions for valid calendar dates. +#[allow(clippy::cast_sign_loss, clippy::cast_possible_wrap)] +#[must_use] +pub fn civil_to_epoch_days(year: i32, month: u32, day: u32) -> i64 { + let y = if month <= 2 { + i64::from(year) - 1 + } else { + i64::from(year) + }; + let m = if month <= 2 { + i64::from(month) + 9 + } else { + i64::from(month) - 3 + }; + let era = if y >= 0 { y } else { y - 399 } / 400; + let yoe = (y - era * 400) as u64; + let doy = (153 * m as u64 + 2) / 5 + u64::from(day) - 1; + let doe = yoe * 365 + yoe / 4 - yoe / 100 + doy; + era * 146_097 + doe as i64 - 719_468 +} + +/// Convert `epoch_ms` to milliseconds-of-day in Eastern Time (DST-aware). +// Reason: ms_of_day fits in i32; epoch_ms is in valid market data range. +#[allow(clippy::cast_possible_wrap, clippy::cast_possible_truncation)] +#[must_use] +pub fn timestamp_to_ms_of_day(epoch_ms: u64) -> i32 { + let offset = eastern_offset_ms(epoch_ms); + let local_ms = epoch_ms as i64 + offset; + (local_ms.rem_euclid(86_400_000)) as i32 +} + +/// Convert `epoch_ms` to YYYYMMDD date integer in Eastern Time (DST-aware). +// Reason: date components fit in i32; epoch_ms is in valid market data range. +#[allow( + clippy::cast_possible_wrap, + clippy::cast_sign_loss, + clippy::cast_possible_truncation +)] +#[must_use] +pub fn timestamp_to_date(epoch_ms: u64) -> i32 { + let offset = eastern_offset_ms(epoch_ms); + let local_secs = (epoch_ms as i64 + offset) / 1_000; + let days = local_secs / 86400 + 719_468; + let era = if days >= 0 { days } else { days - 146_096 } / 146_097; + let doe = (days - era * 146_097) as u32; + let yoe = (doe - doe / 1460 + doe / 36524 - doe / 146_096) / 365; + let y = i64::from(yoe) + era * 400; + let doy = doe - (365 * yoe + yoe / 4 - yoe / 100); + let mp = (5 * doy + 2) / 153; + let d = doy - (153 * mp + 2) / 5 + 1; + let m = if mp < 10 { mp + 3 } else { mp - 9 }; + let y = if m <= 2 { y + 1 } else { y }; + (y as i32) * 10_000 + (m as i32) * 100 + (d as i32) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + // Reason: ms_of_day fits in i32; epoch_ms is in valid market data range. + #[allow(clippy::cast_possible_wrap, clippy::cast_possible_truncation)] + fn timestamp_to_ms_of_day_edt() { + // 2026-04-01 09:30:00 ET (EDT, UTC-4) = 2026-04-01 13:30:00 UTC + let epoch_ms: u64 = 1_775_050_200_000; // Apr 1 2026, 13:30 UTC + let ms = timestamp_to_ms_of_day(epoch_ms); + assert_eq!(ms, 34_200_000, "9:30 AM ET in milliseconds"); + } + + #[test] + // Reason: ms_of_day fits in i32; epoch_ms is in valid market data range. + #[allow(clippy::cast_possible_wrap, clippy::cast_possible_truncation)] + fn timestamp_to_ms_of_day_est() { + // 2026-01-15 09:30:00 ET (EST, UTC-5) = 2026-01-15 14:30:00 UTC + let epoch_ms: u64 = 1_768_487_400_000; + let ms = timestamp_to_ms_of_day(epoch_ms); + assert_eq!(ms, 34_200_000, "9:30 AM ET in milliseconds (winter)"); + } + + #[test] + fn timestamp_to_date_edt() { + let epoch_ms: u64 = 1_775_050_200_000; // Apr 1 2026, 13:30 UTC + let date = timestamp_to_date(epoch_ms); + assert_eq!(date, 20260401); + } + + #[test] + fn timestamp_to_date_est() { + let epoch_ms: u64 = 1_768_487_400_000; // Jan 15 2026, 14:30 UTC + let date = timestamp_to_date(epoch_ms); + assert_eq!(date, 20260115); + } + + #[test] + fn dst_transition_march_2026() { + // 2026 DST starts March 8 (second Sunday of March) + // Before: EST (UTC-5) at 06:59 UTC. After: EDT (UTC-4) at 07:01 UTC. + let before: u64 = 1_772_953_140_000; // Mar 8 2026, 06:59 UTC + assert_eq!(eastern_offset_ms(before), -5 * 3_600 * 1_000); + let after: u64 = 1_772_953_260_000; // Mar 8 2026, 07:01 UTC + assert_eq!(eastern_offset_ms(after), -4 * 3_600 * 1_000); + } + + #[test] + fn pre2007_dst_summer_uses_old_rules() { + // 2006: old rules apply (first Sunday April -> last Sunday October). + // 2006-07-15 18:00:00 UTC = 2006-07-15 14:00:00 EDT (summer, mid-July). + // This is well within DST under both old and new rules, so EDT (UTC-4). + let epoch_ms: u64 = 1_153_065_600_000; // Jul 15 2006, 18:00 UTC + assert_eq!( + eastern_offset_ms(epoch_ms), + -4 * 3_600 * 1_000, + "mid-July 2006 should be EDT under old DST rules" + ); + } + + #[test] + fn pre2007_est_before_april_dst_start() { + // 2006: old rules — DST starts first Sunday of April (April 2, 2006). + // 2006-02-15 15:00:00 UTC = 2006-02-15 10:00:00 EST (winter, mid-Feb). + let epoch_ms: u64 = 1_140_015_600_000; // Feb 15 2006, 15:00 UTC + assert_eq!( + eastern_offset_ms(epoch_ms), + -5 * 3_600 * 1_000, + "mid-February 2006 should be EST under old DST rules" + ); + } +} diff --git a/crates/thetadatadx/Cargo.toml b/crates/thetadatadx/Cargo.toml index 1a789f26..dedbaa85 100644 --- a/crates/thetadatadx/Cargo.toml +++ b/crates/thetadatadx/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "thetadatadx" -version = "8.0.35" +version = "8.0.36" edition.workspace = true rust-version.workspace = true authors.workspace = true @@ -40,7 +40,7 @@ frames = ["polars", "arrow"] live-tests = [] [dependencies] -tdbe = { version = "0.12.9", path = "../tdbe" } +tdbe = { version = "0.12.10", path = "../tdbe" } # gRPC + protobuf (tonic 0.14 extracted prost codec into tonic-prost) tonic = { version = "=0.14.5", features = ["tls-ring", "tls-native-roots", "channel", "transport"] } @@ -141,7 +141,7 @@ prost-build = "=0.14.3" regex = "1.12.3" toml = "1.1.2" serde = { version = "1.0.228", features = ["derive"] } -tdbe = { version = "0.12.9", path = "../tdbe" } +tdbe = { version = "0.12.10", path = "../tdbe" } [[bench]] name = "bench_decode" diff --git a/crates/thetadatadx/build_support/ticks/templates/parser/eod_date.rs.tmpl b/crates/thetadatadx/build_support/ticks/templates/parser/eod_date.rs.tmpl index 73bb9c93..9a52eac4 100644 --- a/crates/thetadatadx/build_support/ticks/templates/parser/eod_date.rs.tmpl +++ b/crates/thetadatadx/build_support/ticks/templates/parser/eod_date.rs.tmpl @@ -6,7 +6,7 @@ match dv.data_type.as_ref() { Some(crate::proto::data_value::DataType::Number(n)) => Ok(*n as i32), Some(crate::proto::data_value::DataType::Price(p)) => Ok(p.value), - Some(crate::proto::data_value::DataType::Timestamp(ts)) => Ok(crate::decode::timestamp_to_date(ts.epoch_ms)), + Some(crate::proto::data_value::DataType::Timestamp(ts)) => Ok(tdbe::time::timestamp_to_date(ts.epoch_ms)), Some(crate::proto::data_value::DataType::NullValue(_)) => Ok(0), None => Err(DecodeError::TypeMismatch { column: idx, diff --git a/crates/thetadatadx/build_support/ticks/templates/parser/eod_num.rs.tmpl b/crates/thetadatadx/build_support/ticks/templates/parser/eod_num.rs.tmpl index 202ba1eb..d152686e 100644 --- a/crates/thetadatadx/build_support/ticks/templates/parser/eod_num.rs.tmpl +++ b/crates/thetadatadx/build_support/ticks/templates/parser/eod_num.rs.tmpl @@ -6,7 +6,7 @@ match dv.data_type.as_ref() { Some(crate::proto::data_value::DataType::Number(n)) => Ok(*n as i32), Some(crate::proto::data_value::DataType::Price(p)) => Ok(p.value), - Some(crate::proto::data_value::DataType::Timestamp(ts)) => Ok(crate::decode::timestamp_to_ms_of_day(ts.epoch_ms)), + Some(crate::proto::data_value::DataType::Timestamp(ts)) => Ok(tdbe::time::timestamp_to_ms_of_day(ts.epoch_ms)), Some(crate::proto::data_value::DataType::NullValue(_)) => Ok(0), None => Err(DecodeError::TypeMismatch { column: idx, diff --git a/crates/thetadatadx/build_support/ticks/templates/parser/eod_num64.rs.tmpl b/crates/thetadatadx/build_support/ticks/templates/parser/eod_num64.rs.tmpl index e5615273..f22a9a58 100644 --- a/crates/thetadatadx/build_support/ticks/templates/parser/eod_num64.rs.tmpl +++ b/crates/thetadatadx/build_support/ticks/templates/parser/eod_num64.rs.tmpl @@ -6,7 +6,7 @@ match dv.data_type.as_ref() { Some(crate::proto::data_value::DataType::Number(n)) => Ok(*n), Some(crate::proto::data_value::DataType::Price(p)) => Ok(i64::from(p.value)), - Some(crate::proto::data_value::DataType::Timestamp(ts)) => Ok(i64::from(crate::decode::timestamp_to_ms_of_day(ts.epoch_ms))), + Some(crate::proto::data_value::DataType::Timestamp(ts)) => Ok(i64::from(tdbe::time::timestamp_to_ms_of_day(ts.epoch_ms))), Some(crate::proto::data_value::DataType::NullValue(_)) => Ok(0), None => Err(DecodeError::TypeMismatch { column: idx, diff --git a/crates/thetadatadx/src/config.rs b/crates/thetadatadx/src/config.rs deleted file mode 100644 index 1f55ea8c..00000000 --- a/crates/thetadatadx/src/config.rs +++ /dev/null @@ -1,1396 +0,0 @@ -//! Server configuration for direct `ThetaData` access. -//! -//! # Server topology (from decompiled Java + `config_0.properties`) -//! -//! `ThetaData` runs two server types in their NJ datacenter: -//! -//! ## MDDS — Market Data Distribution Server (gRPC, historical data) -//! -//! The v1/v2 config listed multiple socket-level hosts: -//! ```text -//! MDDS_NJ_HOSTS=nj-a.thetadata.us:12000,nj-a.thetadata.us:12001, -//! nj-b.thetadata.us:12000,nj-b.thetadata.us:12001 -//! ``` -//! -//! But the v3 terminal uses a **single gRPC endpoint** over TLS: -//! ```text -//! mdds-01.thetadata.us:443 -//! ``` -//! -//! Source: `MddsConnectionManager` in decompiled terminal — the v3 code path -//! constructs a gRPC channel to `mdds-01.thetadata.us:443` with TLS, ignoring -//! the multi-host config entirely. -//! -//! ## FPSS — Feed Processing Streaming Server (TCP, real-time streaming) -//! -//! FPSS still uses the multi-host config with round-robin failover: -//! ```text -//! FPSS_NJ_HOSTS=nj-a.thetadata.us:20000,nj-a.thetadata.us:20001, -//! nj-b.thetadata.us:20000,nj-b.thetadata.us:20001 -//! ``` -//! -//! Source: `FpssConnectionManager` in decompiled terminal — iterates through -//! hosts on connection failure. - -use std::sync::Arc; -use std::time::Duration; - -use tdbe::types::enums::RemoveReason; - -use crate::error::Error; - -// ── Environment variable names ──────────────────────────────────────────── -// -// Two groups: -// -// * Compatibility set (`THETADATA_MDDS_HOST`, `THETADATA_MDDS_PORT`, -// `THETADATA_EMAIL`, `THETADATA_PASSWORD`) — environment variable names -// operators already use to configure existing `ThetaData` clients; -// reusing them here means an existing shell config keeps working. -// * DX extensions — cover surfaces that were previously hardcoded (Nexus -// URL, FPSS host/port, `client_type`) so site operators can steer -// traffic at a staging cluster without a code change. -// -// Precedence is documented on `DirectConfig`: explicit builder setter > -// env var > hardcoded default. - -/// MDDS gRPC host. -pub const ENV_MDDS_HOST: &str = "THETADATA_MDDS_HOST"; -/// MDDS gRPC port. -pub const ENV_MDDS_PORT: &str = "THETADATA_MDDS_PORT"; -/// Nexus auth base URL override. -pub const ENV_NEXUS_URL: &str = "THETADATA_NEXUS_URL"; -/// FPSS hostname override. Replaces the primary FPSS host slot; fallback -/// hosts are preserved. -pub const ENV_FPSS_HOST: &str = "THETADATA_FPSS_HOST"; -/// FPSS port override. Pairs with [`ENV_FPSS_HOST`]. -pub const ENV_FPSS_PORT: &str = "THETADATA_FPSS_PORT"; -/// `QueryInfo.client_type` override — steer server-side quotas and -/// dashboards to treat a deployment as a named fleet. -pub const ENV_CLIENT_TYPE: &str = "THETADATA_CLIENT_TYPE"; - -/// Controls FPSS reconnection behavior after a disconnect. -/// -/// # Default -/// -/// [`ReconnectPolicy::Auto`] matches the Java terminal's `handleInvoluntaryDisconnect()`: -/// permanent errors stop immediately, `TooManyRequests` waits 130s, everything else -/// waits 2s, up to 5 attempts. -/// -/// # Custom -/// -/// Supply a closure that receives the disconnect reason and attempt number (1-based) -/// and returns `Some(delay)` to reconnect after that delay, or `None` to stop. -#[derive(Clone, Default)] -pub enum ReconnectPolicy { - /// Auto-reconnect matching Java terminal behavior (default). - /// - /// - Permanent errors (invalid credentials, account issues): no reconnect. - /// - `TooManyRequests`: 130s wait. - /// - All others: 2s wait. - /// - Up to 5 consecutive reconnect attempts before giving up. - #[default] - Auto, - /// No auto-reconnect. User calls `reconnect_streaming()` manually. - Manual, - /// User-provided function: `(reason, attempt_number) -> Option`. - /// - /// Return `Some(delay)` to reconnect after `delay`, `None` to stop. - /// `attempt_number` starts at 1 and increments on each consecutive reconnect. - Custom(Arc Option + Send + Sync>), -} - -impl std::fmt::Debug for ReconnectPolicy { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - Self::Auto => write!(f, "Auto"), - Self::Manual => write!(f, "Manual"), - Self::Custom(_) => write!(f, "Custom(...)"), - } - } -} - -/// Controls when the FPSS write buffer is flushed. -#[derive(Debug, Default, Clone, Copy, PartialEq, Eq)] -pub enum FpssFlushMode { - /// Flush only on PING frames (every 100ms). Matches Java terminal. - /// Lower syscall overhead, up to 100ms additional latency. - #[default] - Batched, - /// Flush after every frame write. Lowest latency, higher syscall overhead. - Immediate, -} - -/// Exponential-backoff retry policy for transient gRPC errors on MDDS. -/// -/// Only wired on status codes `Unavailable`, `DeadlineExceeded`, and -/// `ResourceExhausted`. Permission / credential failures route through -/// the separate auto-refresh path (see the in-crate `MddsClient` wrappers) -/// and are never retried by this policy. -/// -/// # Jitter -/// -/// With `jitter = true` (default) the sleep duration follows AWS's -/// *full jitter* pattern: `delay = rand(0, min(max_delay, initial * -/// 2^attempt))`. Full jitter provably minimises retry-storm contention -/// relative to equal jitter or no jitter; see -/// . -/// -/// With `jitter = false` the delay is the deterministic backoff -/// `min(max_delay, initial * 2^attempt)`. Useful for tests that -/// need to assert exact timings. -#[derive(Debug, Clone, Copy)] -pub struct RetryPolicy { - /// Delay used for the first retry (attempt 1). Doubles per attempt. - pub initial_delay: Duration, - /// Upper bound on the computed backoff delay, regardless of attempt. - pub max_delay: Duration, - /// Total attempt budget. `1` disables retry (single call only); - /// `0` still permits the initial call but allows no retries. - pub max_attempts: u32, - /// Apply AWS-style full jitter to each retry delay. - pub jitter: bool, -} - -impl Default for RetryPolicy { - fn default() -> Self { - Self { - initial_delay: Duration::from_millis(250), - max_delay: Duration::from_secs(30), - max_attempts: 5, - jitter: true, - } - } -} - -impl RetryPolicy { - /// Build a policy with retry disabled — single attempt, no backoff. - #[must_use] - pub fn disabled() -> Self { - Self { - initial_delay: Duration::ZERO, - max_delay: Duration::ZERO, - max_attempts: 1, - jitter: false, - } - } - - /// Compute the sleep delay before the next retry. - /// - /// `attempt` is 1-based (attempt 1 = first retry after the initial - /// call failed). The returned duration is: - /// - /// * capped at `max_delay`, - /// * exponentiated as `initial_delay * 2^(attempt - 1)`, - /// * jittered (when `self.jitter`) across `[0, capped_delay]`. - /// - /// Overflow in `initial_delay * 2^(attempt - 1)` saturates at - /// `max_delay` rather than wrapping, so pathological `attempt` - /// values never yield a zero delay. - #[must_use] - pub fn delay_for_attempt(&self, attempt: u32) -> Duration { - let capped = self.capped_backoff(attempt); - if self.jitter { - jitter_sample(capped) - } else { - capped - } - } - - /// Deterministic capped backoff (no jitter). Exposed for tests that - /// need to assert the upper-bound envelope for a given attempt. - #[must_use] - pub fn capped_backoff(&self, attempt: u32) -> Duration { - if attempt == 0 { - return Duration::ZERO; - } - // `shift = attempt - 1` so attempt 1 = base, attempt 2 = base*2, - // attempt 3 = base*4. `u32::checked_shl(shift)` overflows - // exactly when `shift >= 32`; clamp before shifting. - let shift = (attempt - 1).min(31); - let base_nanos = self.initial_delay.as_nanos(); - let scaled_nanos = base_nanos.checked_shl(shift).unwrap_or(u128::MAX); - let max_nanos = self.max_delay.as_nanos(); - let nanos = scaled_nanos.min(max_nanos); - // `Duration::from_nanos` takes u64 — clamp rather than truncate. - Duration::from_nanos(u64::try_from(nanos).unwrap_or(u64::MAX)) - } -} - -/// Full-jitter sampler: uniform on `[0, ceiling]`. Uses the `Instant`- -/// derived nanosecond clock as an entropy source so we do not pull in -/// a dedicated RNG crate — sufficient for jitter randomisation where -/// the statistical quality requirement is "any non-pathological spread -/// across callers", not cryptographic randomness. -fn jitter_sample(ceiling: Duration) -> Duration { - let ceiling_nanos = ceiling.as_nanos(); - if ceiling_nanos == 0 { - return Duration::ZERO; - } - // `Instant::elapsed` inside a test might return 0 on some CI - // schedulers; folding both `elapsed` and a process-local counter - // guarantees the sampler advances even then. - use std::sync::atomic::{AtomicU64, Ordering}; - static COUNTER: AtomicU64 = AtomicU64::new(0); - let tick = COUNTER.fetch_add(1, Ordering::Relaxed); - let now_nanos = u64::try_from( - std::time::SystemTime::now() - .duration_since(std::time::UNIX_EPOCH) - .map_or(0, |d| d.as_nanos()), - ) - .unwrap_or(u64::MAX); - // Reason: splitmix64 constants — documented mixer, fine for jitter. - let mut seed = now_nanos ^ tick.wrapping_mul(0x9E37_79B9_7F4A_7C15); - seed ^= seed >> 30; - seed = seed.wrapping_mul(0xBF58_476D_1CE4_E5B9); - seed ^= seed >> 27; - seed = seed.wrapping_mul(0x94D0_49BB_1331_11EB); - seed ^= seed >> 31; - let ceiling_u128 = ceiling_nanos; - let bounded = u128::from(seed) % (ceiling_u128 + 1); - Duration::from_nanos(u64::try_from(bounded).unwrap_or(u64::MAX)) -} - -/// Configuration for connecting to `ThetaData` servers directly. -/// -/// Use [`DirectConfig::production()`] for the standard NJ production servers. -/// -/// # Environment variable overrides -/// -/// [`DirectConfig::production()`] reads the following environment variables -/// and applies them on top of the hardcoded defaults. Explicit builder -/// setters (`.with_metrics_port(...)` etc.) take precedence over env vars, -/// which in turn take precedence over the hardcoded defaults. -/// -/// | Variable | Type | Effect | -/// |---|---|---| -/// | `THETADATA_MDDS_HOST` | host | overrides `mdds_host` | -/// | `THETADATA_MDDS_PORT` | u16 | overrides `mdds_port` | -/// | `THETADATA_NEXUS_URL` | url | overrides the Nexus auth URL | -/// | `THETADATA_FPSS_HOST` | host | overrides the primary FPSS host | -/// | `THETADATA_FPSS_PORT` | u16 | overrides the primary FPSS port | -/// | `THETADATA_CLIENT_TYPE` | str | overrides `QueryInfo.client_type` | -/// | `THETADATA_EMAIL` | str | credential helper ([`crate::auth`]) | -/// | `THETADATA_PASSWORD` | str | credential helper ([`crate::auth`]) | -/// -/// Malformed values (e.g. a non-integer `THETADATA_MDDS_PORT`) are ignored -/// with a `tracing::warn!` — the hardcoded default is retained so a typo -/// in the environment never silently breaks production. -#[derive(Debug, Clone)] -pub struct DirectConfig { - // -- MDDS (gRPC) -- - /// MDDS gRPC hostname. - /// - /// Source: `MddsConnectionManager` in decompiled terminal (v3 path). - pub mdds_host: String, - - /// MDDS gRPC port (443 for TLS in production). - pub mdds_port: u16, - - /// Whether to use TLS for the MDDS gRPC connection. - /// Always `true` in production (standard gRPC-over-TLS on port 443). - pub mdds_tls: bool, - - // -- FPSS (TCP) -- - /// FPSS TCP hosts with round-robin failover. - /// - /// Source: `FPSS_NJ_HOSTS` in `config_0.properties` — the terminal - /// iterates through these on connection failure. - pub fpss_hosts: Vec<(String, u16)>, - - // -- FPSS tuning -- - /// FPSS connection/read timeout in milliseconds. - /// - /// Source: `FPSS_TIMEOUT=10000` in `config_0.properties`. - pub fpss_timeout_ms: u64, - - /// FPSS event channel buffer depth. - /// Caller should pass this to `FpssClient::connect(creds, fpss_queue_depth)`. - /// Increase if stream events are being dropped under high volume. - /// - /// JVM equivalent: `FPSS_QUEUE_DEPTH=1000000` in `config_0.properties`. - /// - /// NOTE: Not automatically wired — caller must pass to `FpssClient::connect()`. - pub fpss_queue_depth: usize, - - /// FPSS disruptor ring buffer size (slots, will be rounded up to a power of 2). - /// - /// The LMAX Disruptor ring buffer used for lock-free event dispatch requires - /// a power-of-2 size. This value is rounded up automatically. Larger rings - /// absorb more burst traffic but use more memory (~`ring_size * sizeof(Option)`). - /// - /// Derived from `fpss_queue_depth` by default. Override for fine-grained control. - pub fpss_ring_size: usize, - - /// FPSS heartbeat ping interval in milliseconds. - /// The protocol requires pings every 100ms; changing this may cause disconnects. - /// - /// Source: `FPSSClient.startPinging()` — timer period = 100ms. - /// - /// NOTE: Not automatically wired — the ping loop uses `protocol::PING_INTERVAL_MS`. - /// Override that constant or pass this value when a configurable ping loop is added. - pub fpss_ping_interval_ms: u64, - - /// Per-server TCP connect timeout in milliseconds. - /// - /// Source: `FPSSClient` — `socket.connect(addr, 2000)`. - /// - /// NOTE: Not automatically wired — the connection module uses `protocol::CONNECT_TIMEOUT_MS`. - /// Override that constant or pass this value when a configurable connect is added. - pub fpss_connect_timeout_ms: u64, - - /// Controls when the FPSS write buffer is flushed. - /// - /// - [`FpssFlushMode::Batched`] (default): only flush on PING frames (~100ms), - /// matching the Java terminal. Lower syscall overhead. - /// - [`FpssFlushMode::Immediate`]: flush after every frame write. Lowest - /// latency, higher syscall overhead. - pub fpss_flush_mode: FpssFlushMode, - - // -- MDDS tuning -- - /// Max concurrent in-flight gRPC requests. - /// - /// JVM equivalent: `2^subscription_tier` (Free=1, Value=2, Standard=4, Pro=8). - /// Set to 0 to auto-detect from the subscription tier returned by Nexus auth. - pub mdds_concurrent_requests: usize, - - /// Max inbound gRPC message size in bytes. - /// - /// JVM equivalent: `maxInboundMessageSize(0x100000 * config.messageSize())`, - /// default 4MB, max 10MB. - pub mdds_max_message_size: usize, - - /// gRPC keepalive interval in seconds. - /// - /// Source: `ChannelProvider` — `keepAliveTime(30, SECONDS)`. - pub mdds_keepalive_secs: u64, - - /// gRPC keepalive timeout in seconds. - /// - /// Source: `ChannelProvider` — `keepAliveTimeout(10, SECONDS)`. - pub mdds_keepalive_timeout_secs: u64, - - /// gRPC flow control: initial stream window size in KB. - /// - /// Maps to `tonic::transport::Endpoint::initial_stream_window_size`. - /// Default 64 KB matches HTTP/2 spec default. - pub mdds_window_size_kb: usize, - - /// gRPC flow control: initial connection window size in KB. - /// - /// Maps to `tonic::transport::Endpoint::initial_connection_window_size`. - /// Default 64 KB. Increase for high-throughput bulk queries. - pub mdds_connection_window_size_kb: usize, - - // -- Reconnection -- - /// Delay before attempting reconnection after a disconnect, in milliseconds. - /// - /// Source: `FPSSClient.RECONNECT_DELAY_MS = 2000` in decompiled terminal. - /// Note: `config_0.properties` has `RECONNECT_WAIT=1000` but the Java code - /// uses the constant `2000` at runtime. - /// - /// NOTE: Not automatically wired — consumed by - /// [`crate::ThetaDataDx::reconnect_streaming`] / the FPSS auto-reconnect path. - pub reconnect_wait_ms: u64, - - /// Delay before reconnecting after a `TooManyRequests` disconnect, in milliseconds. - /// - /// Source: `FPSSClient.handleInvoluntaryDisconnect()` — 130 second wait. - /// - /// NOTE: Not automatically wired — consumed by - /// [`crate::ThetaDataDx::reconnect_streaming`] / the FPSS auto-reconnect path. - pub reconnect_wait_rate_limited_ms: u64, - - // -- Reconnection policy -- - /// Controls FPSS auto-reconnection behavior after involuntary disconnect. - /// - /// Default: [`ReconnectPolicy::Auto`] — matches Java terminal behavior. - pub reconnect_policy: ReconnectPolicy, - - // -- OHLCVC derivation -- - /// Whether to derive OHLCVC bars locally from trade events. - /// - /// When `true` (default), the FPSS client emits derived `FpssData::Ohlcvc` - /// events after each trade. When `false`, only server-sent OHLCVC frames - /// (wire code 24) are emitted, reducing per-trade throughput overhead. - /// - /// The Java terminal always derives OHLCVC with no way to disable it. - pub derive_ohlcvc: bool, - - // -- Threading -- - /// Number of tokio worker threads. `None` = tokio default (number of CPU cores). - /// - /// JVM equivalent: `-Xmx` + `HTTP_CONCURRENCY` thread pool sizing. - /// - /// NOTE: Not automatically wired — caller should use this when building - /// a custom tokio runtime. - pub tokio_worker_threads: Option, - - // -- Retry / reliability -- - /// Exponential-backoff retry policy for transient gRPC errors on MDDS. - /// See [`RetryPolicy`] for defaults and the list of retried status codes. - pub retry_policy: RetryPolicy, - - // -- Endpoints overridable via env -- - /// Nexus auth URL. Default matches the upstream production endpoint; set - /// [`ENV_NEXUS_URL`] to redirect at a staging cluster. - pub nexus_url: String, - - /// Value used for `QueryInfo.client_type`. Defaults to `"rust-thetadatadx"`; - /// override via [`ENV_CLIENT_TYPE`] to identify a deployment fleet in - /// server-side dashboards. - pub client_type: String, - - // -- Observability -- - /// Port the Prometheus exporter binds to when the `metrics-prometheus` - /// cargo feature is enabled. `None` disables the exporter even when the - /// feature is compiled in; `Some(port)` starts an HTTP listener on - /// `0.0.0.0:` whose `/metrics` endpoint exposes every counter - /// and histogram recorded through the `metrics` crate. - pub metrics_port: Option, -} - -impl DirectConfig { - /// Default Nexus auth URL (matches the upstream production endpoint). - pub const DEFAULT_NEXUS_URL: &'static str = - "https://nexus-api.thetadata.us/identity/terminal/auth_user"; - - /// Default `QueryInfo.client_type`. - pub const DEFAULT_CLIENT_TYPE: &'static str = "rust-thetadatadx"; - - /// Production configuration for `ThetaData`'s NJ datacenter. - /// - /// All values extracted from the decompiled Java terminal: - /// - MDDS: `mdds-01.thetadata.us:443` (gRPC over TLS) - /// - FPSS: 4 hosts from `config_0.properties` `FPSS_NJ_HOSTS` - /// - Timeouts: from `config_0.properties` - /// - /// Environment variables listed on [`DirectConfig`] are layered on - /// top of these defaults. - #[must_use] - pub fn production() -> Self { - let mut config = Self::production_defaults(); - config.apply_env_overrides(); - config.validate() - } - - /// Production defaults without env-var overrides. Tests use this to - /// assert the hardcoded shape in isolation; every caller that wants - /// env-var precedence should reach for [`DirectConfig::production`]. - #[must_use] - pub(crate) fn production_defaults() -> Self { - Self { - // Source: MddsConnectionManager (v3 gRPC path) - mdds_host: "mdds-01.thetadata.us".to_string(), - mdds_port: 443, - mdds_tls: true, - - // Source: config_0.properties FPSS_NJ_HOSTS - fpss_hosts: vec![ - ("nj-a.thetadata.us".to_string(), 20000), - ("nj-a.thetadata.us".to_string(), 20001), - ("nj-b.thetadata.us".to_string(), 20000), - ("nj-b.thetadata.us".to_string(), 20001), - ], - - // Source: config_0.properties - fpss_timeout_ms: 10_000, - fpss_queue_depth: 1_000_000, // FPSS_QUEUE_DEPTH - fpss_ring_size: 131_072, // 2^17, covers ~13s at 10k events/sec - fpss_ping_interval_ms: 100, // FPSSClient.startPinging() - fpss_connect_timeout_ms: 2_000, // FPSSClient socket.connect timeout - fpss_flush_mode: FpssFlushMode::Batched, - - // Concurrency: 0 = auto-detect from subscription tier at auth time. - mdds_concurrent_requests: 0, - - // Source: ChannelProvider in decompiled terminal - mdds_max_message_size: 4 * 1024 * 1024, // 4MB default - mdds_keepalive_secs: 30, - mdds_keepalive_timeout_secs: 10, - - // gRPC flow control (HTTP/2 spec defaults) - mdds_window_size_kb: 64, - mdds_connection_window_size_kb: 64, - - // Source: FPSSClient.RECONNECT_DELAY_MS = 2000 in decompiled terminal - reconnect_wait_ms: 2_000, - reconnect_wait_rate_limited_ms: 130_000, // FPSSClient: 130s for TooManyRequests - - // Auto-reconnect matches Java terminal behavior by default - reconnect_policy: ReconnectPolicy::Auto, - - // Derive OHLCVC from trades by default (matches Java terminal) - derive_ohlcvc: true, - - // Default: use all CPU cores - tokio_worker_threads: None, - - retry_policy: RetryPolicy::default(), - nexus_url: Self::DEFAULT_NEXUS_URL.to_string(), - client_type: Self::DEFAULT_CLIENT_TYPE.to_string(), - metrics_port: None, - } - } - - /// Apply the documented [`DirectConfig`] env-var matrix on top of the - /// receiver. Unknown / malformed values are logged and skipped so a - /// typo never silently flips production to the wrong endpoint. - fn apply_env_overrides(&mut self) { - if let Ok(host) = std::env::var(ENV_MDDS_HOST) { - let trimmed = host.trim(); - if !trimmed.is_empty() { - self.mdds_host = trimmed.to_string(); - } - } - if let Ok(port_str) = std::env::var(ENV_MDDS_PORT) { - match port_str.trim().parse::() { - Ok(port) if port > 0 => self.mdds_port = port, - _ => tracing::warn!( - env = ENV_MDDS_PORT, - value = %port_str, - "ignoring malformed env var; keeping hardcoded default" - ), - } - } - if let Ok(url) = std::env::var(ENV_NEXUS_URL) { - let trimmed = url.trim(); - if !trimmed.is_empty() { - self.nexus_url = trimmed.to_string(); - } - } - if let Ok(client_type) = std::env::var(ENV_CLIENT_TYPE) { - let trimmed = client_type.trim(); - if !trimmed.is_empty() { - self.client_type = trimmed.to_string(); - } - } - // FPSS host/port are mirrored as a (host, port) tuple in the - // primary slot. If only one of the pair is set we keep the - // default for the other half rather than guessing. - let env_host = std::env::var(ENV_FPSS_HOST).ok(); - let env_port = std::env::var(ENV_FPSS_PORT).ok(); - if env_host.is_some() || env_port.is_some() { - if self.fpss_hosts.is_empty() { - // Empty defaults would mean "no primary to override". - // Skip silently — production_defaults seeds 4 hosts, so - // this only fires for hand-built configs. - tracing::warn!( - "ignoring THETADATA_FPSS_HOST / THETADATA_FPSS_PORT; \ - DirectConfig has no FPSS hosts to override" - ); - } else { - let (default_host, default_port) = self.fpss_hosts[0].clone(); - let host = env_host - .as_deref() - .map(str::trim) - .filter(|s| !s.is_empty()) - .map_or(default_host, str::to_string); - let port = env_port - .as_deref() - .and_then(|raw| match raw.trim().parse::() { - Ok(p) if p > 0 => Some(p), - _ => { - tracing::warn!( - env = ENV_FPSS_PORT, - value = %raw, - "ignoring malformed env var; keeping hardcoded default" - ); - None - } - }) - .unwrap_or(default_port); - self.fpss_hosts[0] = (host, port); - } - } - } - - /// Dev FPSS configuration. - /// - /// Connects to `ThetaData`'s dev FPSS servers (port 20200) which replay - /// a random historical trading day in an infinite loop at maximum speed. - /// Designed for development and testing when markets are closed. - /// - /// MDDS (historical) still uses production servers -- there is no dev MDDS. - /// - /// Source: `config.toml` `fpss_dev_hosts` and - /// - /// - /// Note: dev server replays data at max speed, so queue and ring sizes - /// match production to avoid drops. Some contracts may not exist on - /// the replayed day. - #[must_use] - pub fn dev() -> Self { - let mut config = Self::production(); - // Source: config.toml fpss_dev_hosts - config.fpss_hosts = vec![ - ("nj-a.thetadata.us".to_string(), 20200), - ("test-server.thetadata.us".to_string(), 20200), - ("test-server.thetadata.us".to_string(), 20201), - ]; - config.validate() - } - - /// Stage FPSS configuration. - /// - /// Connects to `ThetaData`'s staging FPSS servers (port 20100). - /// Frequent reboots, testing data. Not stable. - /// - /// MDDS (historical) still uses production servers. - /// - /// Source: `config.toml` `fpss_stage_hosts` - #[must_use] - pub fn stage() -> Self { - let mut config = Self::production(); - // Source: config.toml fpss_stage_hosts - config.fpss_hosts = vec![ - ("nj-a.thetadata.us".to_string(), 20100), - ("test-server.thetadata.us".to_string(), 20100), - ("test-server.thetadata.us".to_string(), 20101), - ]; - config.validate() - } - - /// Validate configuration values and clamp out-of-range fields, logging - /// a warning for each clamped value. - /// - /// Called automatically by [`production()`](Self::production), - /// [`dev()`](Self::dev), and [`stage()`](Self::stage). Also useful after - /// loading from a TOML file or modifying fields programmatically. - #[must_use] - pub fn validate(mut self) -> Self { - self.fpss_queue_depth = self.fpss_queue_depth.clamp(16, 1_000_000); - self.mdds_window_size_kb = self.mdds_window_size_kb.clamp(64, 1_024); - self.mdds_connection_window_size_kb = self.mdds_connection_window_size_kb.clamp(64, 1_024); - self - } - - /// Build the MDDS gRPC endpoint URI. - /// - /// Returns a URI suitable for `tonic::transport::Channel::from_static()`. - #[must_use] - pub fn mdds_uri(&self) -> String { - let scheme = if self.mdds_tls { "https" } else { "http" }; - format!("{}://{}:{}", scheme, self.mdds_host, self.mdds_port) - } - - /// Set whether to derive OHLCVC bars locally from trade events. - /// - /// When `false`, only server-sent OHLCVC frames are emitted, - /// reducing per-trade throughput overhead. - #[must_use] - pub fn derive_ohlcvc(mut self, enabled: bool) -> Self { - self.derive_ohlcvc = enabled; - self - } - - /// Set the port the Prometheus exporter should bind to when the - /// `metrics-prometheus` cargo feature is enabled. The exporter - /// exposes `/metrics` over HTTP on `0.0.0.0:`. - #[must_use] - pub fn with_metrics_port(mut self, port: u16) -> Self { - self.metrics_port = Some(port); - self - } - - /// Override the retry policy for transient gRPC errors. - #[must_use] - pub fn with_retry_policy(mut self, policy: RetryPolicy) -> Self { - self.retry_policy = policy; - self - } - - /// Override the Nexus auth URL. Intended for staging deployments — - /// production should use [`ENV_NEXUS_URL`] or the default. - #[must_use] - pub fn with_nexus_url(mut self, url: impl Into) -> Self { - self.nexus_url = url.into(); - self - } - - /// Override `QueryInfo.client_type`. Appears in server-side logs - /// and dashboards; useful for tagging a deployment fleet. - #[must_use] - pub fn with_client_type(mut self, client_type: impl Into) -> Self { - self.client_type = client_type.into(); - self - } - - /// Parse FPSS hosts from a comma-separated `host:port,host:port,...` string. - /// - /// This is the format used in `config_0.properties` for `FPSS_NJ_HOSTS`. - /// # Errors - /// - /// Returns an error on network, authentication, or parsing failure. - pub fn parse_fpss_hosts(hosts_str: &str) -> Result, Error> { - let mut result = Vec::new(); - - for entry in hosts_str.split(',') { - let entry = entry.trim(); - if entry.is_empty() { - continue; - } - - let (host, port_str) = entry - .rsplit_once(':') - .ok_or_else(|| Error::Config(format!("invalid host:port entry: '{entry}'")))?; - - let port: u16 = port_str - .parse() - .map_err(|e| Error::Config(format!("invalid port in '{entry}': {e}")))?; - - result.push((host.to_string(), port)); - } - - if result.is_empty() { - return Err(Error::Config("no FPSS hosts provided".to_string())); - } - - Ok(result) - } -} - -// ── Config file loading (behind `config-file` feature) ────────────────────── - -#[cfg(feature = "config-file")] -mod config_file { - use super::{DirectConfig, FpssFlushMode}; - use crate::error::Error; - use serde::Deserialize; - - /// TOML-level representation of the config file. - /// - /// Unknown keys are silently ignored (`#[serde(default)]` on each section). - /// Missing sections fall back to production defaults. - #[derive(Debug, Default, Deserialize)] - #[serde(default)] - struct ConfigFile { - mdds: MddsSection, - fpss: FpssSection, - grpc: GrpcSection, - auth: AuthSection, - } - - #[derive(Debug, Deserialize)] - #[serde(default)] - struct MddsSection { - host: String, - port: u16, - tls: bool, - keepalive_time_secs: u64, - keepalive_timeout_secs: u64, - max_message_size: usize, - } - - impl Default for MddsSection { - fn default() -> Self { - let prod = DirectConfig::production(); - Self { - host: prod.mdds_host, - port: prod.mdds_port, - tls: prod.mdds_tls, - keepalive_time_secs: prod.mdds_keepalive_secs, - keepalive_timeout_secs: prod.mdds_keepalive_timeout_secs, - max_message_size: prod.mdds_max_message_size, - } - } - } - - #[derive(Debug, Deserialize)] - #[serde(default)] - struct FpssSection { - /// Hosts as `["host:port", ...]` array or `"host:port,host:port"` string. - hosts: FpssHosts, - connect_timeout: u64, - read_timeout: u64, - ping_interval: u64, - reconnect_wait: u64, - reconnect_wait_rate_limited: u64, - queue_depth: usize, - ring_size: usize, - flush_mode: String, - } - - impl Default for FpssSection { - fn default() -> Self { - let prod = DirectConfig::production(); - Self { - hosts: FpssHosts::Array( - prod.fpss_hosts - .iter() - .map(|(h, p)| format!("{h}:{p}")) - .collect(), - ), - connect_timeout: prod.fpss_connect_timeout_ms, - read_timeout: prod.fpss_timeout_ms, - ping_interval: prod.fpss_ping_interval_ms, - reconnect_wait: prod.reconnect_wait_ms, - reconnect_wait_rate_limited: prod.reconnect_wait_rate_limited_ms, - queue_depth: prod.fpss_queue_depth, - ring_size: prod.fpss_ring_size, - flush_mode: "batched".to_string(), - } - } - } - - /// FPSS hosts can be specified as either a TOML array or a comma-separated string. - #[derive(Debug, Deserialize)] - #[serde(untagged)] - enum FpssHosts { - Array(Vec), - Csv(String), - } - - impl Default for FpssHosts { - fn default() -> Self { - let prod = DirectConfig::production(); - FpssHosts::Array( - prod.fpss_hosts - .iter() - .map(|(h, p)| format!("{h}:{p}")) - .collect(), - ) - } - } - - #[derive(Debug, Deserialize)] - #[serde(default)] - struct GrpcSection { - window_size_kb: usize, - connection_window_size_kb: usize, - max_message_size_mb: usize, - concurrent_requests: usize, - } - - impl Default for GrpcSection { - fn default() -> Self { - let prod = DirectConfig::production(); - Self { - window_size_kb: prod.mdds_window_size_kb, - connection_window_size_kb: prod.mdds_connection_window_size_kb, - max_message_size_mb: prod.mdds_max_message_size / (1024 * 1024), - concurrent_requests: prod.mdds_concurrent_requests, - } - } - } - - #[derive(Debug, Default, Deserialize)] - #[serde(default)] - struct AuthSection { - #[serde(rename = "creds_file")] - _creds_file: Option, - } - - impl FpssHosts { - fn parse(self) -> Result, Error> { - let entries = match self { - FpssHosts::Array(arr) => arr, - FpssHosts::Csv(s) => s.split(',').map(|s| s.trim().to_string()).collect(), - }; - let mut result = Vec::new(); - for entry in entries { - let entry = entry.trim(); - if entry.is_empty() { - continue; - } - let (host, port_str) = entry - .rsplit_once(':') - .ok_or_else(|| Error::Config(format!("invalid host:port entry: '{entry}'")))?; - let port: u16 = port_str - .parse() - .map_err(|e| Error::Config(format!("invalid port in '{entry}': {e}")))?; - result.push((host.to_string(), port)); - } - if result.is_empty() { - return Err(Error::Config("no FPSS hosts provided".to_string())); - } - Ok(result) - } - } - - impl DirectConfig { - /// Load configuration from a TOML file. - /// - /// The file format matches `config.default.toml` shipped with the crate. - /// Missing sections and keys fall back to [`DirectConfig::production()`] defaults. - /// Unknown keys are silently ignored. - /// - /// # Example file - /// - /// ```toml - /// [mdds] - /// host = "mdds-01.thetadata.us" - /// port = 443 - /// tls = true - /// - /// [fpss] - /// hosts = ["nj-a.thetadata.us:20000", "nj-b.thetadata.us:20000"] - /// reconnect_wait = 2000 - /// queue_depth = 1_000_000 - /// flush_mode = "batched" # or "immediate" - /// - /// [grpc] - /// window_size_kb = 64 - /// connection_window_size_kb = 64 - /// concurrent_requests = 0 # 0 = auto from tier - /// ``` - /// # Errors - /// - /// Returns an error on network, authentication, or parsing failure. - pub fn from_file(path: impl AsRef) -> Result { - let contents = std::fs::read_to_string(path.as_ref()).map_err(|e| { - Error::Config(format!( - "failed to read config file '{}': {e}", - path.as_ref().display() - )) - })?; - Self::from_toml_str(&contents) - } - - /// Parse configuration from a TOML string. - /// - /// Same semantics as [`from_file`](Self::from_file) but takes a string directly. - /// # Errors - /// - /// Returns an error on network, authentication, or parsing failure. - pub fn from_toml_str(toml_str: &str) -> Result { - let cf: ConfigFile = toml::from_str(toml_str) - .map_err(|e| Error::Config(format!("failed to parse TOML config: {e}")))?; - - let flush_mode = match cf.fpss.flush_mode.to_lowercase().as_str() { - "immediate" => FpssFlushMode::Immediate, - _ => FpssFlushMode::Batched, - }; - - // If [grpc].max_message_size_mb is set, it overrides [mdds].max_message_size. - // The grpc section value is in MB; the mdds section value is in bytes. - let max_message_size = if cf.grpc.max_message_size_mb - != DirectConfig::production().mdds_max_message_size / (1024 * 1024) - { - cf.grpc.max_message_size_mb * 1024 * 1024 - } else { - cf.mdds.max_message_size - }; - - Ok(DirectConfig { - mdds_host: cf.mdds.host, - mdds_port: cf.mdds.port, - mdds_tls: cf.mdds.tls, - - fpss_hosts: cf.fpss.hosts.parse()?, - fpss_timeout_ms: cf.fpss.read_timeout, - fpss_queue_depth: cf.fpss.queue_depth, - fpss_ring_size: cf.fpss.ring_size, - fpss_ping_interval_ms: cf.fpss.ping_interval, - fpss_connect_timeout_ms: cf.fpss.connect_timeout, - fpss_flush_mode: flush_mode, - - mdds_concurrent_requests: cf.grpc.concurrent_requests, - mdds_max_message_size: max_message_size, - mdds_keepalive_secs: cf.mdds.keepalive_time_secs, - mdds_keepalive_timeout_secs: cf.mdds.keepalive_timeout_secs, - mdds_window_size_kb: cf.grpc.window_size_kb, - mdds_connection_window_size_kb: cf.grpc.connection_window_size_kb, - - reconnect_wait_ms: cf.fpss.reconnect_wait, - reconnect_wait_rate_limited_ms: cf.fpss.reconnect_wait_rate_limited, - - // TOML config cannot express custom closures; default to Auto. - // Use the builder API to set Manual or Custom programmatically. - reconnect_policy: super::ReconnectPolicy::Auto, - - // Default: derive OHLCVC from trades (matches production default). - // Use the builder API to disable programmatically. - derive_ohlcvc: true, - - tokio_worker_threads: None, - - // TOML does not surface RetryPolicy / observability fields - // today — the builder API (`with_retry_policy`, - // `with_metrics_port`, env vars) is the opt-in path. - retry_policy: super::RetryPolicy::default(), - nexus_url: DirectConfig::DEFAULT_NEXUS_URL.to_string(), - client_type: DirectConfig::DEFAULT_CLIENT_TYPE.to_string(), - metrics_port: None, - } - .validate()) - } - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn production_mdds_uri() { - let config = DirectConfig::production(); - assert_eq!(config.mdds_uri(), "https://mdds-01.thetadata.us:443"); - } - - #[test] - fn production_has_four_fpss_hosts() { - let config = DirectConfig::production(); - assert_eq!(config.fpss_hosts.len(), 4); - } - - #[test] - fn production_default_reconnect_policy_is_auto() { - let config = DirectConfig::production(); - assert!(matches!(config.reconnect_policy, ReconnectPolicy::Auto)); - } - - #[test] - fn parse_fpss_hosts_parses_multi_host_csv_with_whitespace_and_empty_entries() { - let hosts = - DirectConfig::parse_fpss_hosts(" nj-a.thetadata.us:20000, ,nj-b.thetadata.us:20001 ") - .unwrap(); - assert_eq!(hosts.len(), 2); - assert_eq!(hosts[0], ("nj-a.thetadata.us".to_string(), 20000)); - assert_eq!(hosts[1], ("nj-b.thetadata.us".to_string(), 20001)); - } - - #[test] - fn parse_fpss_hosts_rejects_malformed_entries() { - assert!(DirectConfig::parse_fpss_hosts("").is_err()); - assert!(DirectConfig::parse_fpss_hosts("host:notaport").is_err()); - assert!(DirectConfig::parse_fpss_hosts("hostonly").is_err()); - } - - // -- Config file tests (only compiled with the `config-file` feature) -- - - #[cfg(feature = "config-file")] - mod config_file_tests { - use crate::config::{DirectConfig, FpssFlushMode}; - - #[test] - fn empty_toml_gives_production_defaults() { - let config = DirectConfig::from_toml_str("").unwrap(); - let prod = DirectConfig::production(); - assert_eq!(config.mdds_host, prod.mdds_host); - assert_eq!(config.mdds_port, prod.mdds_port); - assert_eq!(config.fpss_hosts.len(), prod.fpss_hosts.len()); - assert_eq!(config.fpss_queue_depth, prod.fpss_queue_depth); - } - - #[test] - fn partial_toml_overrides_only_specified() { - let toml = r#" - [mdds] - host = "custom.example.com" - port = 8443 - - [fpss] - queue_depth = 500000 - "#; - let config = DirectConfig::from_toml_str(toml).unwrap(); - assert_eq!(config.mdds_host, "custom.example.com"); - assert_eq!(config.mdds_port, 8443); - assert_eq!(config.fpss_queue_depth, 500000); - // Unspecified fields keep production defaults - assert!(config.mdds_tls); - } - - #[test] - fn fpss_hosts_as_array() { - let toml = r#" - [fpss] - hosts = ["host-a.example.com:20000", "host-b.example.com:20001"] - "#; - let config = DirectConfig::from_toml_str(toml).unwrap(); - assert_eq!(config.fpss_hosts.len(), 2); - assert_eq!( - config.fpss_hosts[0], - ("host-a.example.com".to_string(), 20000) - ); - assert_eq!( - config.fpss_hosts[1], - ("host-b.example.com".to_string(), 20001) - ); - } - - #[test] - fn fpss_hosts_as_csv_string() { - let toml = r#" - [fpss] - hosts = "host-a.example.com:20000,host-b.example.com:20001" - "#; - let config = DirectConfig::from_toml_str(toml).unwrap(); - assert_eq!(config.fpss_hosts.len(), 2); - assert_eq!(config.fpss_hosts[0].0, "host-a.example.com"); - } - - #[test] - fn flush_mode_immediate() { - let toml = r#" - [fpss] - flush_mode = "immediate" - "#; - let config = DirectConfig::from_toml_str(toml).unwrap(); - assert_eq!(config.fpss_flush_mode, FpssFlushMode::Immediate); - } - - #[test] - fn flush_mode_batched_by_default() { - let toml = r#" - [fpss] - flush_mode = "batched" - "#; - let config = DirectConfig::from_toml_str(toml).unwrap(); - assert_eq!(config.fpss_flush_mode, FpssFlushMode::Batched); - } - - #[test] - fn grpc_section_sets_window_sizes() { - let toml = r#" - [grpc] - window_size_kb = 128 - connection_window_size_kb = 256 - concurrent_requests = 4 - "#; - let config = DirectConfig::from_toml_str(toml).unwrap(); - assert_eq!(config.mdds_window_size_kb, 128); - assert_eq!(config.mdds_connection_window_size_kb, 256); - assert_eq!(config.mdds_concurrent_requests, 4); - } - - #[test] - fn grpc_max_message_size_mb_overrides_mdds_bytes() { - let toml = r#" - [grpc] - max_message_size_mb = 8 - "#; - let config = DirectConfig::from_toml_str(toml).unwrap(); - assert_eq!(config.mdds_max_message_size, 8 * 1024 * 1024); - } - - #[test] - fn unknown_keys_are_ignored() { - let toml = r#" - [mdds] - host = "mdds-01.thetadata.us" - port = 443 - unknown_key = "should be ignored" - - [some_unknown_section] - foo = "bar" - "#; - // Should not error - let config = DirectConfig::from_toml_str(toml).unwrap(); - assert_eq!(config.mdds_port, 443); - } - - #[test] - fn full_config_default_toml_parses() { - // Validate that config.default.toml (shipped with the crate) can be parsed. - let default_toml = include_str!("../../../config.default.toml"); - let config = DirectConfig::from_toml_str(default_toml).unwrap(); - assert_eq!(config.mdds_host, "mdds-01.thetadata.us"); - assert_eq!(config.mdds_port, 443); - assert_eq!(config.fpss_hosts.len(), 4); - } - - #[test] - fn invalid_toml_returns_error() { - let result = DirectConfig::from_toml_str("this is not valid toml [[["); - assert!(result.is_err()); - assert!(result.unwrap_err().to_string().contains("TOML")); - } - } - - // -- Validation tests -- - - #[test] - fn validate_clamps_out_of_range_values() { - let mut config = DirectConfig::production_defaults(); - config.fpss_queue_depth = 5; - config.mdds_window_size_kb = 2_048; - let config = config.validate(); - assert_eq!(config.fpss_queue_depth, 16); - assert_eq!(config.mdds_window_size_kb, 1_024); - } - - #[test] - fn validate_preserves_in_range_values() { - let config = DirectConfig::production_defaults(); - let validated = config.validate(); - assert_eq!(validated.fpss_queue_depth, 1_000_000); - assert_eq!(validated.mdds_window_size_kb, 64); - } - - // ── RetryPolicy / env var tests ────────────────────────────────── - - #[test] - fn retry_policy_default_shape_is_stable() { - let p = RetryPolicy::default(); - assert_eq!(p.initial_delay, Duration::from_millis(250)); - assert_eq!(p.max_delay, Duration::from_secs(30)); - assert_eq!(p.max_attempts, 5); - assert!(p.jitter); - } - - #[test] - fn retry_policy_capped_backoff_doubles_each_attempt_then_caps() { - let p = RetryPolicy { - initial_delay: Duration::from_millis(100), - max_delay: Duration::from_millis(800), - max_attempts: 10, - jitter: false, - }; - assert_eq!(p.capped_backoff(0), Duration::ZERO); - assert_eq!(p.capped_backoff(1), Duration::from_millis(100)); - assert_eq!(p.capped_backoff(2), Duration::from_millis(200)); - assert_eq!(p.capped_backoff(3), Duration::from_millis(400)); - assert_eq!(p.capped_backoff(4), Duration::from_millis(800)); - // Saturates at max_delay; never exceeds the cap even on absurd attempt counts. - assert_eq!(p.capped_backoff(5), Duration::from_millis(800)); - assert_eq!(p.capped_backoff(60), Duration::from_millis(800)); - } - - #[test] - fn retry_policy_delay_for_attempt_respects_jitter_upper_bound() { - let p = RetryPolicy { - initial_delay: Duration::from_millis(100), - max_delay: Duration::from_millis(1_000), - max_attempts: 10, - jitter: true, - }; - // Full-jitter envelope: sample ∈ [0, capped_backoff(attempt)]. - // Exercise 200 draws per attempt to shake out off-by-one issues - // without making the test flaky — every sample must land in - // the closed interval above. - for attempt in 1..=6u32 { - let ceiling = p.capped_backoff(attempt); - for _ in 0..200 { - let delay = p.delay_for_attempt(attempt); - assert!( - delay <= ceiling, - "attempt {attempt}: delay {delay:?} exceeded ceiling {ceiling:?}" - ); - } - } - } - - #[test] - fn retry_policy_delay_for_attempt_deterministic_without_jitter() { - let p = RetryPolicy { - initial_delay: Duration::from_millis(50), - max_delay: Duration::from_millis(400), - max_attempts: 5, - jitter: false, - }; - // No jitter → every draw equals the capped backoff envelope. - for attempt in 1..=4u32 { - let expected = p.capped_backoff(attempt); - for _ in 0..16 { - assert_eq!(p.delay_for_attempt(attempt), expected); - } - } - } - - #[test] - fn retry_policy_disabled_yields_single_attempt() { - let p = RetryPolicy::disabled(); - assert_eq!(p.max_attempts, 1); - assert_eq!(p.delay_for_attempt(1), Duration::ZERO); - assert!(!p.jitter); - } - - // `std::env` is a process-global singleton; the env-var tests use a - // single mutex so they don't trample each other under - // `cargo test -- --test-threads=N`. Each test keeps hold of the - // guard for the duration of the config build + assertions. - fn env_test_guard() -> std::sync::MutexGuard<'static, ()> { - use std::sync::{Mutex, OnceLock}; - static LOCK: OnceLock> = OnceLock::new(); - LOCK.get_or_init(|| Mutex::new(())) - .lock() - .unwrap_or_else(|poison| poison.into_inner()) - } - - fn clear_env_matrix() { - // Unset every variable the `apply_env_overrides` path reads so - // no test leaks into another. The guard above pins us as the - // sole writer. - unsafe { - // Reason: test-only mutation; protected by env_test_guard. - std::env::remove_var(ENV_MDDS_HOST); - std::env::remove_var(ENV_MDDS_PORT); - std::env::remove_var(ENV_NEXUS_URL); - std::env::remove_var(ENV_FPSS_HOST); - std::env::remove_var(ENV_FPSS_PORT); - std::env::remove_var(ENV_CLIENT_TYPE); - } - } - - #[test] - fn env_overrides_apply_on_production() { - let _guard = env_test_guard(); - clear_env_matrix(); - unsafe { - // Reason: test-only mutation; protected by env_test_guard. - std::env::set_var(ENV_MDDS_HOST, "mdds.staging.example.com"); - std::env::set_var(ENV_MDDS_PORT, "8443"); - std::env::set_var(ENV_NEXUS_URL, "https://nexus.staging.example.com/auth"); - std::env::set_var(ENV_CLIENT_TYPE, "rust-thetadatadx-staging"); - std::env::set_var(ENV_FPSS_HOST, "fpss.staging.example.com"); - std::env::set_var(ENV_FPSS_PORT, "21000"); - } - let config = DirectConfig::production(); - assert_eq!(config.mdds_host, "mdds.staging.example.com"); - assert_eq!(config.mdds_port, 8443); - assert_eq!(config.nexus_url, "https://nexus.staging.example.com/auth"); - assert_eq!(config.client_type, "rust-thetadatadx-staging"); - assert_eq!( - config.fpss_hosts[0], - ("fpss.staging.example.com".to_string(), 21000) - ); - clear_env_matrix(); - } - - #[test] - fn builder_takes_precedence_over_env_var() { - let _guard = env_test_guard(); - clear_env_matrix(); - unsafe { - // Reason: test-only mutation; protected by env_test_guard. - std::env::set_var(ENV_CLIENT_TYPE, "env-wins-when-no-builder"); - } - let config = DirectConfig::production().with_client_type("builder-wins"); - assert_eq!(config.client_type, "builder-wins"); - clear_env_matrix(); - } - - #[test] - fn env_overrides_skipped_when_values_malformed() { - let _guard = env_test_guard(); - clear_env_matrix(); - unsafe { - // Reason: test-only mutation; protected by env_test_guard. - std::env::set_var(ENV_MDDS_PORT, "not-a-port"); - std::env::set_var(ENV_FPSS_PORT, "0"); // reject zero - std::env::set_var(ENV_MDDS_HOST, " "); // whitespace-only - } - let config = DirectConfig::production(); - let defaults = DirectConfig::production_defaults(); - assert_eq!(config.mdds_host, defaults.mdds_host); - assert_eq!(config.mdds_port, defaults.mdds_port); - assert_eq!(config.fpss_hosts[0].1, defaults.fpss_hosts[0].1); - clear_env_matrix(); - } - - #[test] - fn production_defaults_are_not_sensitive_to_env() { - let _guard = env_test_guard(); - clear_env_matrix(); - unsafe { - // Reason: test-only mutation; protected by env_test_guard. - std::env::set_var(ENV_MDDS_HOST, "ignored-by-defaults"); - std::env::set_var(ENV_MDDS_PORT, "9999"); - } - let config = DirectConfig::production_defaults(); - assert_eq!(config.mdds_host, "mdds-01.thetadata.us"); - assert_eq!(config.mdds_port, 443); - clear_env_matrix(); - } -} diff --git a/crates/thetadatadx/src/config/auth.rs b/crates/thetadatadx/src/config/auth.rs new file mode 100644 index 00000000..3f2df43e --- /dev/null +++ b/crates/thetadatadx/src/config/auth.rs @@ -0,0 +1,37 @@ +//! Auth (Nexus) sub-configuration. + +/// Default Nexus auth URL (matches the upstream production endpoint). +pub const DEFAULT_NEXUS_URL: &str = "https://nexus-api.thetadata.us/identity/terminal/auth_user"; + +/// Default `QueryInfo.client_type`. +pub const DEFAULT_CLIENT_TYPE: &str = "rust-thetadatadx"; + +/// Nexus authentication endpoint + client identifier. +#[derive(Debug, Clone)] +pub struct AuthConfig { + /// Nexus auth URL. Default matches the upstream production endpoint; set + /// [`crate::config::ENV_NEXUS_URL`] to redirect at a staging cluster. + pub nexus_url: String, + + /// Value used for `QueryInfo.client_type`. Defaults to `"rust-thetadatadx"`; + /// override via [`crate::config::ENV_CLIENT_TYPE`] to identify a deployment fleet + /// in server-side dashboards. + pub client_type: String, +} + +impl AuthConfig { + /// Production defaults — upstream Nexus URL + canonical `rust-thetadatadx` client type. + #[must_use] + pub fn production_defaults() -> Self { + Self { + nexus_url: DEFAULT_NEXUS_URL.to_string(), + client_type: DEFAULT_CLIENT_TYPE.to_string(), + } + } +} + +impl Default for AuthConfig { + fn default() -> Self { + Self::production_defaults() + } +} diff --git a/crates/thetadatadx/src/config/env.rs b/crates/thetadatadx/src/config/env.rs new file mode 100644 index 00000000..34980dbc --- /dev/null +++ b/crates/thetadatadx/src/config/env.rs @@ -0,0 +1,103 @@ +//! Environment-variable override layer for [`DirectConfig`]. +//! +//! Two groups: +//! +//! * Compatibility set (`THETADATA_MDDS_HOST`, `THETADATA_MDDS_PORT`, +//! `THETADATA_EMAIL`, `THETADATA_PASSWORD`) — environment variable names +//! operators already use to configure existing `ThetaData` clients; +//! reusing them here means an existing shell config keeps working. +//! * DX extensions — cover surfaces that were previously hardcoded (Nexus +//! URL, FPSS host/port, `client_type`) so site operators can steer +//! traffic at a staging cluster without a code change. +//! +//! Precedence is documented on `DirectConfig`: explicit builder setter > +//! env var > hardcoded default. + +use super::DirectConfig; + +/// MDDS gRPC host. +pub const ENV_MDDS_HOST: &str = "THETADATA_MDDS_HOST"; +/// MDDS gRPC port. +pub const ENV_MDDS_PORT: &str = "THETADATA_MDDS_PORT"; +/// Nexus auth base URL override. +pub const ENV_NEXUS_URL: &str = "THETADATA_NEXUS_URL"; +/// FPSS hostname override. Replaces the primary FPSS host slot; fallback +/// hosts are preserved. +pub const ENV_FPSS_HOST: &str = "THETADATA_FPSS_HOST"; +/// FPSS port override. Pairs with [`ENV_FPSS_HOST`]. +pub const ENV_FPSS_PORT: &str = "THETADATA_FPSS_PORT"; +/// `QueryInfo.client_type` override — steer server-side quotas and +/// dashboards to treat a deployment as a named fleet. +pub const ENV_CLIENT_TYPE: &str = "THETADATA_CLIENT_TYPE"; + +/// Apply the documented [`DirectConfig`] env-var matrix on top of the +/// receiver. Unknown / malformed values are logged and skipped so a +/// typo never silently flips production to the wrong endpoint. +pub(super) fn apply_env_overrides(cfg: &mut DirectConfig) { + if let Ok(host) = std::env::var(ENV_MDDS_HOST) { + let trimmed = host.trim(); + if !trimmed.is_empty() { + cfg.mdds.host = trimmed.to_string(); + } + } + if let Ok(port_str) = std::env::var(ENV_MDDS_PORT) { + match port_str.trim().parse::() { + Ok(port) if port > 0 => cfg.mdds.port = port, + _ => tracing::warn!( + env = ENV_MDDS_PORT, + value = %port_str, + "ignoring malformed env var; keeping hardcoded default" + ), + } + } + if let Ok(url) = std::env::var(ENV_NEXUS_URL) { + let trimmed = url.trim(); + if !trimmed.is_empty() { + cfg.auth.nexus_url = trimmed.to_string(); + } + } + if let Ok(client_type) = std::env::var(ENV_CLIENT_TYPE) { + let trimmed = client_type.trim(); + if !trimmed.is_empty() { + cfg.auth.client_type = trimmed.to_string(); + } + } + // FPSS host/port are mirrored as a (host, port) tuple in the + // primary slot. If only one of the pair is set we keep the + // default for the other half rather than guessing. + let env_host = std::env::var(ENV_FPSS_HOST).ok(); + let env_port = std::env::var(ENV_FPSS_PORT).ok(); + if env_host.is_some() || env_port.is_some() { + if cfg.fpss.hosts.is_empty() { + // Empty defaults would mean "no primary to override". + // Skip silently — production_defaults seeds 4 hosts, so + // this only fires for hand-built configs. + tracing::warn!( + "ignoring THETADATA_FPSS_HOST / THETADATA_FPSS_PORT; \ + DirectConfig has no FPSS hosts to override" + ); + } else { + let (default_host, default_port) = cfg.fpss.hosts[0].clone(); + let host = env_host + .as_deref() + .map(str::trim) + .filter(|s| !s.is_empty()) + .map_or(default_host, str::to_string); + let port = env_port + .as_deref() + .and_then(|raw| match raw.trim().parse::() { + Ok(p) if p > 0 => Some(p), + _ => { + tracing::warn!( + env = ENV_FPSS_PORT, + value = %raw, + "ignoring malformed env var; keeping hardcoded default" + ); + None + } + }) + .unwrap_or(default_port); + cfg.fpss.hosts[0] = (host, port); + } + } +} diff --git a/crates/thetadatadx/src/config/fpss.rs b/crates/thetadatadx/src/config/fpss.rs new file mode 100644 index 00000000..d6b56fd7 --- /dev/null +++ b/crates/thetadatadx/src/config/fpss.rs @@ -0,0 +1,107 @@ +//! FPSS (TCP streaming) sub-configuration. + +/// Controls when the FPSS write buffer is flushed. +#[derive(Debug, Default, Clone, Copy, PartialEq, Eq)] +pub enum FpssFlushMode { + /// Flush only on PING frames (every 100ms). Matches Java terminal. + /// Lower syscall overhead, up to 100ms additional latency. + #[default] + Batched, + /// Flush after every frame write. Lowest latency, higher syscall overhead. + Immediate, +} + +/// FPSS streaming client tuning. +#[derive(Debug, Clone)] +pub struct FpssConfig { + /// FPSS TCP hosts with round-robin failover. + /// + /// Source: `FPSS_NJ_HOSTS` in `config_0.properties` — the terminal + /// iterates through these on connection failure. + pub hosts: Vec<(String, u16)>, + + /// FPSS connection/read timeout in milliseconds. + /// + /// Source: `FPSS_TIMEOUT=10000` in `config_0.properties`. + pub timeout_ms: u64, + + /// FPSS event channel buffer depth. + /// Caller should pass this to `FpssClient::connect(creds, queue_depth)`. + /// Increase if stream events are being dropped under high volume. + /// + /// JVM equivalent: `FPSS_QUEUE_DEPTH=1000000` in `config_0.properties`. + /// + /// NOTE: Not automatically wired — caller must pass to `FpssClient::connect()`. + pub queue_depth: usize, + + /// FPSS disruptor ring buffer size (slots, will be rounded up to a power of 2). + /// + /// The LMAX Disruptor ring buffer used for lock-free event dispatch requires + /// a power-of-2 size. This value is rounded up automatically. Larger rings + /// absorb more burst traffic but use more memory (~`ring_size * sizeof(Option)`). + /// + /// Derived from `queue_depth` by default. Override for fine-grained control. + pub ring_size: usize, + + /// FPSS heartbeat ping interval in milliseconds. + /// The protocol requires pings every 100ms; changing this may cause disconnects. + /// + /// Source: `FPSSClient.startPinging()` — timer period = 100ms. + /// + /// NOTE: Not automatically wired — the ping loop uses `protocol::PING_INTERVAL_MS`. + /// Override that constant or pass this value when a configurable ping loop is added. + pub ping_interval_ms: u64, + + /// Per-server TCP connect timeout in milliseconds. + /// + /// Source: `FPSSClient` — `socket.connect(addr, 2000)`. + /// + /// NOTE: Not automatically wired — the connection module uses `protocol::CONNECT_TIMEOUT_MS`. + /// Override that constant or pass this value when a configurable connect is added. + pub connect_timeout_ms: u64, + + /// Controls when the FPSS write buffer is flushed. + /// + /// - [`FpssFlushMode::Batched`] (default): only flush on PING frames (~100ms), + /// matching the Java terminal. Lower syscall overhead. + /// - [`FpssFlushMode::Immediate`]: flush after every frame write. Lowest + /// latency, higher syscall overhead. + pub flush_mode: FpssFlushMode, + + /// Whether to derive OHLCVC bars locally from trade events. + /// + /// When `true` (default), the FPSS client emits derived `FpssData::Ohlcvc` + /// events after each trade. When `false`, only server-sent OHLCVC frames + /// (wire code 24) are emitted, reducing per-trade throughput overhead. + /// + /// The Java terminal always derives OHLCVC with no way to disable it. + pub derive_ohlcvc: bool, +} + +impl FpssConfig { + /// Production defaults — extracted from the decompiled Java terminal. + #[must_use] + pub fn production_defaults() -> Self { + Self { + hosts: vec![ + ("nj-a.thetadata.us".to_string(), 20000), + ("nj-a.thetadata.us".to_string(), 20001), + ("nj-b.thetadata.us".to_string(), 20000), + ("nj-b.thetadata.us".to_string(), 20001), + ], + timeout_ms: 10_000, + queue_depth: 1_000_000, + ring_size: 131_072, + ping_interval_ms: 100, + connect_timeout_ms: 2_000, + flush_mode: FpssFlushMode::Batched, + derive_ohlcvc: true, + } + } +} + +impl Default for FpssConfig { + fn default() -> Self { + Self::production_defaults() + } +} diff --git a/crates/thetadatadx/src/config/mdds.rs b/crates/thetadatadx/src/config/mdds.rs new file mode 100644 index 00000000..492dd0d3 --- /dev/null +++ b/crates/thetadatadx/src/config/mdds.rs @@ -0,0 +1,89 @@ +//! MDDS (gRPC) sub-configuration. +//! +//! All values originate from the decompiled Java terminal's +//! `MddsConnectionManager` / `ChannelProvider`; defaults match what the +//! v3 terminal sends in production. + +/// MDDS gRPC client tuning. +#[derive(Debug, Clone)] +pub struct MddsConfig { + /// MDDS gRPC hostname. + /// + /// Source: `MddsConnectionManager` in decompiled terminal (v3 path). + pub host: String, + + /// MDDS gRPC port (443 for TLS in production). + pub port: u16, + + /// Whether to use TLS for the MDDS gRPC connection. + /// Always `true` in production (standard gRPC-over-TLS on port 443). + pub tls: bool, + + /// Max concurrent in-flight gRPC requests. + /// + /// JVM equivalent: `2^subscription_tier` (Free=1, Value=2, Standard=4, Pro=8). + /// Set to 0 to auto-detect from the subscription tier returned by Nexus auth. + pub concurrent_requests: usize, + + /// Max inbound gRPC message size in bytes. + /// + /// JVM equivalent: `maxInboundMessageSize(0x100000 * config.messageSize())`, + /// default 4MB, max 10MB. + pub max_message_size: usize, + + /// gRPC keepalive interval in seconds. + /// + /// Source: `ChannelProvider` — `keepAliveTime(30, SECONDS)`. + pub keepalive_secs: u64, + + /// gRPC keepalive timeout in seconds. + /// + /// Source: `ChannelProvider` — `keepAliveTimeout(10, SECONDS)`. + pub keepalive_timeout_secs: u64, + + /// gRPC flow control: initial stream window size in KB. + /// + /// Maps to `tonic::transport::Endpoint::initial_stream_window_size`. + /// Default 64 KB matches HTTP/2 spec default. + pub window_size_kb: usize, + + /// gRPC flow control: initial connection window size in KB. + /// + /// Maps to `tonic::transport::Endpoint::initial_connection_window_size`. + /// Default 64 KB. Increase for high-throughput bulk queries. + pub connection_window_size_kb: usize, + + /// TCP connect timeout for the MDDS gRPC channel, in seconds. + /// + /// Bounds the time the tonic endpoint will spend establishing a TCP + + /// TLS handshake before failing fast. Default `10s` matches the upper + /// bound observed in the Java terminal's `ChannelProvider` connect + /// path; production deployments behind NAT / VPN can raise this to + /// absorb slow handshakes without altering keepalive cadence. + pub connect_timeout_secs: u64, +} + +impl MddsConfig { + /// Production defaults — extracted from the decompiled Java terminal. + #[must_use] + pub fn production_defaults() -> Self { + Self { + host: "mdds-01.thetadata.us".to_string(), + port: 443, + tls: true, + concurrent_requests: 0, + max_message_size: 4 * 1024 * 1024, + keepalive_secs: 30, + keepalive_timeout_secs: 10, + window_size_kb: 64, + connection_window_size_kb: 64, + connect_timeout_secs: 10, + } + } +} + +impl Default for MddsConfig { + fn default() -> Self { + Self::production_defaults() + } +} diff --git a/crates/thetadatadx/src/config/metrics.rs b/crates/thetadatadx/src/config/metrics.rs new file mode 100644 index 00000000..9ed7feed --- /dev/null +++ b/crates/thetadatadx/src/config/metrics.rs @@ -0,0 +1,12 @@ +//! Observability sub-configuration (Prometheus exporter binding). + +/// Observability binding (Prometheus exporter port). +#[derive(Debug, Clone, Default)] +pub struct MetricsConfig { + /// Port the Prometheus exporter binds to when the `metrics-prometheus` + /// cargo feature is enabled. `None` disables the exporter even when the + /// feature is compiled in; `Some(port)` starts an HTTP listener on + /// `0.0.0.0:` whose `/metrics` endpoint exposes every counter + /// and histogram recorded through the `metrics` crate. + pub port: Option, +} diff --git a/crates/thetadatadx/src/config/mod.rs b/crates/thetadatadx/src/config/mod.rs new file mode 100644 index 00000000..5f800c21 --- /dev/null +++ b/crates/thetadatadx/src/config/mod.rs @@ -0,0 +1,1118 @@ +//! Server configuration for direct `ThetaData` access. +//! +//! # Server topology (from decompiled Java + `config_0.properties`) +//! +//! `ThetaData` runs two server types in their NJ datacenter: +//! +//! ## MDDS — Market Data Distribution Server (gRPC, historical data) +//! +//! The v1/v2 config listed multiple socket-level hosts: +//! ```text +//! MDDS_NJ_HOSTS=nj-a.thetadata.us:12000,nj-a.thetadata.us:12001, +//! nj-b.thetadata.us:12000,nj-b.thetadata.us:12001 +//! ``` +//! +//! But the v3 terminal uses a **single gRPC endpoint** over TLS: +//! ```text +//! mdds-01.thetadata.us:443 +//! ``` +//! +//! Source: `MddsConnectionManager` in decompiled terminal — the v3 code path +//! constructs a gRPC channel to `mdds-01.thetadata.us:443` with TLS, ignoring +//! the multi-host config entirely. +//! +//! ## FPSS — Feed Processing Streaming Server (TCP, real-time streaming) +//! +//! FPSS still uses the multi-host config with round-robin failover: +//! ```text +//! FPSS_NJ_HOSTS=nj-a.thetadata.us:20000,nj-a.thetadata.us:20001, +//! nj-b.thetadata.us:20000,nj-b.thetadata.us:20001 +//! ``` +//! +//! Source: `FpssConnectionManager` in decompiled terminal — iterates through +//! hosts on connection failure. +//! +//! # Layout +//! +//! [`DirectConfig`] is composed of seven nested sub-configs: +//! +//! | Field | Type | +//! |-----------------|-----------------------------------------------------| +//! | `mdds` | [`MddsConfig`] — gRPC host/port/TLS/keepalive | +//! | `fpss` | [`FpssConfig`] — TCP hosts, queue/ring, flush mode | +//! | `reconnect` | [`ReconnectConfig`] — wait cadence + policy | +//! | `retry` | [`RetryPolicy`] — exponential backoff for MDDS gRPC | +//! | `auth` | [`AuthConfig`] — Nexus URL + `client_type` | +//! | `metrics` | [`MetricsConfig`] — Prometheus exporter port | +//! | `runtime` | [`RuntimeConfig`] — tokio worker thread sizing | + +mod auth; +mod env; +mod fpss; +mod mdds; +mod metrics; +mod reconnect; +mod retry; +mod runtime; + +use crate::error::Error; + +pub use auth::{AuthConfig, DEFAULT_CLIENT_TYPE, DEFAULT_NEXUS_URL}; +pub use env::{ + ENV_CLIENT_TYPE, ENV_FPSS_HOST, ENV_FPSS_PORT, ENV_MDDS_HOST, ENV_MDDS_PORT, ENV_NEXUS_URL, +}; +pub use fpss::{FpssConfig, FpssFlushMode}; +pub use mdds::MddsConfig; +pub use metrics::MetricsConfig; +pub use reconnect::{ReconnectConfig, ReconnectPolicy}; +pub use retry::RetryPolicy; +pub use runtime::RuntimeConfig; + +/// Configuration for connecting to `ThetaData` servers directly. +/// +/// Use [`DirectConfig::production()`] for the standard NJ production servers. +/// +/// # Layout +/// +/// Fields are grouped into seven nested sub-configs ([`MddsConfig`], +/// [`FpssConfig`], [`ReconnectConfig`], [`RetryPolicy`], [`AuthConfig`], +/// [`MetricsConfig`], [`RuntimeConfig`]). Read accessors on [`DirectConfig`] +/// preserve the field-style naming used by older callers; writes go through +/// the nested struct (e.g. `cfg.fpss.queue_depth = N`). +/// +/// # Environment variable overrides +/// +/// [`DirectConfig::production()`] reads the following environment variables +/// and applies them on top of the hardcoded defaults. Explicit builder +/// setters (`.with_metrics_port(...)` etc.) take precedence over env vars, +/// which in turn take precedence over the hardcoded defaults. +/// +/// | Variable | Type | Effect | +/// |---|---|---| +/// | `THETADATA_MDDS_HOST` | host | overrides `mdds.host` | +/// | `THETADATA_MDDS_PORT` | u16 | overrides `mdds.port` | +/// | `THETADATA_NEXUS_URL` | url | overrides the Nexus auth URL | +/// | `THETADATA_FPSS_HOST` | host | overrides the primary FPSS host | +/// | `THETADATA_FPSS_PORT` | u16 | overrides the primary FPSS port | +/// | `THETADATA_CLIENT_TYPE` | str | overrides `auth.client_type` | +/// | `THETADATA_EMAIL` | str | credential helper ([`crate::auth`]) | +/// | `THETADATA_PASSWORD` | str | credential helper ([`crate::auth`]) | +/// +/// Malformed values (e.g. a non-integer `THETADATA_MDDS_PORT`) are ignored +/// with a `tracing::warn!` — the hardcoded default is retained so a typo +/// in the environment never silently breaks production. +#[derive(Debug, Clone, Default)] +pub struct DirectConfig { + /// MDDS gRPC tuning. + pub mdds: MddsConfig, + /// FPSS streaming tuning. + pub fpss: FpssConfig, + /// Reconnection cadence + policy. + pub reconnect: ReconnectConfig, + /// MDDS retry policy. + pub retry: RetryPolicy, + /// Nexus auth endpoint + client type. + pub auth: AuthConfig, + /// Prometheus exporter binding. + pub metrics: MetricsConfig, + /// Async runtime tuning. + pub runtime: RuntimeConfig, +} + +impl DirectConfig { + /// Default Nexus auth URL (matches the upstream production endpoint). + pub const DEFAULT_NEXUS_URL: &'static str = DEFAULT_NEXUS_URL; + + /// Default `QueryInfo.client_type`. + pub const DEFAULT_CLIENT_TYPE: &'static str = DEFAULT_CLIENT_TYPE; + + /// Production configuration for `ThetaData`'s NJ datacenter. + /// + /// All values extracted from the decompiled Java terminal: + /// - MDDS: `mdds-01.thetadata.us:443` (gRPC over TLS) + /// - FPSS: 4 hosts from `config_0.properties` `FPSS_NJ_HOSTS` + /// - Timeouts: from `config_0.properties` + /// + /// Environment variables listed on [`DirectConfig`] are layered on + /// top of these defaults. + #[must_use] + pub fn production() -> Self { + let mut config = Self::production_defaults(); + env::apply_env_overrides(&mut config); + config.validate() + } + + /// Production defaults without env-var overrides. Tests use this to + /// assert the hardcoded shape in isolation; every caller that wants + /// env-var precedence should reach for [`DirectConfig::production`]. + #[must_use] + pub(crate) fn production_defaults() -> Self { + Self { + mdds: MddsConfig::production_defaults(), + fpss: FpssConfig::production_defaults(), + reconnect: ReconnectConfig::production_defaults(), + retry: RetryPolicy::default(), + auth: AuthConfig::production_defaults(), + metrics: MetricsConfig::default(), + runtime: RuntimeConfig::default(), + } + } + + /// Dev FPSS configuration. + /// + /// Connects to `ThetaData`'s dev FPSS servers (port 20200) which replay + /// a random historical trading day in an infinite loop at maximum speed. + /// Designed for development and testing when markets are closed. + /// + /// MDDS (historical) still uses production servers -- there is no dev MDDS. + /// + /// Source: `config.toml` `fpss_dev_hosts` and + /// + /// + /// Note: dev server replays data at max speed, so queue and ring sizes + /// match production to avoid drops. Some contracts may not exist on + /// the replayed day. + #[must_use] + pub fn dev() -> Self { + let mut config = Self::production(); + // Source: config.toml fpss_dev_hosts + config.fpss.hosts = vec![ + ("nj-a.thetadata.us".to_string(), 20200), + ("test-server.thetadata.us".to_string(), 20200), + ("test-server.thetadata.us".to_string(), 20201), + ]; + config.validate() + } + + /// Stage FPSS configuration. + /// + /// Connects to `ThetaData`'s staging FPSS servers (port 20100). + /// Frequent reboots, testing data. Not stable. + /// + /// MDDS (historical) still uses production servers. + /// + /// Source: `config.toml` `fpss_stage_hosts` + #[must_use] + pub fn stage() -> Self { + let mut config = Self::production(); + // Source: config.toml fpss_stage_hosts + config.fpss.hosts = vec![ + ("nj-a.thetadata.us".to_string(), 20100), + ("test-server.thetadata.us".to_string(), 20100), + ("test-server.thetadata.us".to_string(), 20101), + ]; + config.validate() + } + + /// Validate configuration values and clamp out-of-range fields, logging + /// a warning for each clamped value. + /// + /// Called automatically by [`production()`](Self::production), + /// [`dev()`](Self::dev), and [`stage()`](Self::stage). Also useful after + /// loading from a TOML file or modifying fields programmatically. + #[must_use] + pub fn validate(mut self) -> Self { + self.fpss.queue_depth = self.fpss.queue_depth.clamp(16, 1_000_000); + self.mdds.window_size_kb = self.mdds.window_size_kb.clamp(64, 1_024); + self.mdds.connection_window_size_kb = self.mdds.connection_window_size_kb.clamp(64, 1_024); + self + } + + /// Build the MDDS gRPC endpoint URI. + /// + /// Returns a URI suitable for `tonic::transport::Channel::from_static()`. + #[must_use] + pub fn mdds_uri(&self) -> String { + let scheme = if self.mdds.tls { "https" } else { "http" }; + format!("{}://{}:{}", scheme, self.mdds.host, self.mdds.port) + } + + /// Set whether to derive OHLCVC bars locally from trade events. + /// + /// When `false`, only server-sent OHLCVC frames are emitted, + /// reducing per-trade throughput overhead. + #[must_use] + pub fn derive_ohlcvc(mut self, enabled: bool) -> Self { + self.fpss.derive_ohlcvc = enabled; + self + } + + /// Set the port the Prometheus exporter should bind to when the + /// `metrics-prometheus` cargo feature is enabled. The exporter + /// exposes `/metrics` over HTTP on `0.0.0.0:`. + #[must_use] + pub fn with_metrics_port(mut self, port: u16) -> Self { + self.metrics.port = Some(port); + self + } + + /// Override the retry policy for transient gRPC errors. + #[must_use] + pub fn with_retry_policy(mut self, policy: RetryPolicy) -> Self { + self.retry = policy; + self + } + + /// Override the Nexus auth URL. Intended for staging deployments — + /// production should use [`ENV_NEXUS_URL`] or the default. + #[must_use] + pub fn with_nexus_url(mut self, url: impl Into) -> Self { + self.auth.nexus_url = url.into(); + self + } + + /// Override `QueryInfo.client_type`. Appears in server-side logs + /// and dashboards; useful for tagging a deployment fleet. + #[must_use] + pub fn with_client_type(mut self, client_type: impl Into) -> Self { + self.auth.client_type = client_type.into(); + self + } + + /// Parse FPSS hosts from a comma-separated `host:port,host:port,...` string. + /// + /// This is the format used in `config_0.properties` for `FPSS_NJ_HOSTS`. + /// # Errors + /// + /// Returns an error on network, authentication, or parsing failure. + pub fn parse_fpss_hosts(hosts_str: &str) -> Result, Error> { + let mut result = Vec::new(); + + for entry in hosts_str.split(',') { + let entry = entry.trim(); + if entry.is_empty() { + continue; + } + + let (host, port_str) = entry + .rsplit_once(':') + .ok_or_else(|| Error::Config(format!("invalid host:port entry: '{entry}'")))?; + + let port: u16 = port_str + .parse() + .map_err(|e| Error::Config(format!("invalid port in '{entry}': {e}")))?; + + result.push((host.to_string(), port)); + } + + if result.is_empty() { + return Err(Error::Config("no FPSS hosts provided".to_string())); + } + + Ok(result) + } +} + +// ── Read accessors (back-compat for the old flat field names) ──────────── +// +// External callers that still spell config reads as `config.mdds_host(...)` +// should call these accessor methods. Field-syntax reads (`config.mdds_host`) +// no longer compile and must migrate to the nested form +// (`config.mdds.host`); see the commit body for the migration table. +impl DirectConfig { + /// MDDS gRPC hostname. + #[must_use] + pub fn mdds_host(&self) -> &str { + &self.mdds.host + } + /// MDDS gRPC port. + #[must_use] + pub fn mdds_port(&self) -> u16 { + self.mdds.port + } + /// Whether MDDS uses TLS. + #[must_use] + pub fn mdds_tls(&self) -> bool { + self.mdds.tls + } + /// MDDS concurrent in-flight requests budget. + #[must_use] + pub fn mdds_concurrent_requests(&self) -> usize { + self.mdds.concurrent_requests + } + /// MDDS max inbound message size, in bytes. + #[must_use] + pub fn mdds_max_message_size(&self) -> usize { + self.mdds.max_message_size + } + /// MDDS keepalive ping interval, in seconds. + #[must_use] + pub fn mdds_keepalive_secs(&self) -> u64 { + self.mdds.keepalive_secs + } + /// MDDS keepalive ping timeout, in seconds. + #[must_use] + pub fn mdds_keepalive_timeout_secs(&self) -> u64 { + self.mdds.keepalive_timeout_secs + } + /// MDDS HTTP/2 stream window size, in KB. + #[must_use] + pub fn mdds_window_size_kb(&self) -> usize { + self.mdds.window_size_kb + } + /// MDDS HTTP/2 connection window size, in KB. + #[must_use] + pub fn mdds_connection_window_size_kb(&self) -> usize { + self.mdds.connection_window_size_kb + } + /// MDDS TCP connect timeout, in seconds. + #[must_use] + pub fn mdds_connect_timeout_secs(&self) -> u64 { + self.mdds.connect_timeout_secs + } + + /// FPSS host list. + #[must_use] + pub fn fpss_hosts(&self) -> &[(String, u16)] { + &self.fpss.hosts + } + /// FPSS read timeout, in milliseconds. + #[must_use] + pub fn fpss_timeout_ms(&self) -> u64 { + self.fpss.timeout_ms + } + /// FPSS event channel buffer depth. + #[must_use] + pub fn fpss_queue_depth(&self) -> usize { + self.fpss.queue_depth + } + /// FPSS disruptor ring buffer size. + #[must_use] + pub fn fpss_ring_size(&self) -> usize { + self.fpss.ring_size + } + /// FPSS heartbeat ping interval, in milliseconds. + #[must_use] + pub fn fpss_ping_interval_ms(&self) -> u64 { + self.fpss.ping_interval_ms + } + /// FPSS TCP connect timeout, in milliseconds. + #[must_use] + pub fn fpss_connect_timeout_ms(&self) -> u64 { + self.fpss.connect_timeout_ms + } + /// FPSS write-buffer flush mode. + #[must_use] + pub fn fpss_flush_mode(&self) -> FpssFlushMode { + self.fpss.flush_mode + } + /// Whether to derive OHLCVC bars locally from trade events. + #[must_use] + pub fn derive_ohlcvc_enabled(&self) -> bool { + self.fpss.derive_ohlcvc + } + + /// FPSS reconnect wait, in milliseconds. + #[must_use] + pub fn reconnect_wait_ms(&self) -> u64 { + self.reconnect.wait_ms + } + /// FPSS reconnect wait after `TooManyRequests`, in milliseconds. + #[must_use] + pub fn reconnect_wait_rate_limited_ms(&self) -> u64 { + self.reconnect.wait_rate_limited_ms + } + /// FPSS reconnect policy. + #[must_use] + pub fn reconnect_policy(&self) -> &ReconnectPolicy { + &self.reconnect.policy + } + + /// MDDS retry policy. + #[must_use] + pub fn retry_policy(&self) -> RetryPolicy { + self.retry + } + + /// Nexus auth URL. + #[must_use] + pub fn nexus_url(&self) -> &str { + &self.auth.nexus_url + } + /// `QueryInfo.client_type` value. + #[must_use] + pub fn client_type(&self) -> &str { + &self.auth.client_type + } + + /// Prometheus exporter port (`None` disables the exporter). + #[must_use] + pub fn metrics_port(&self) -> Option { + self.metrics.port + } + + /// Tokio worker thread count (`None` = tokio default). + #[must_use] + pub fn tokio_worker_threads(&self) -> Option { + self.runtime.tokio_worker_threads + } +} + +// ── Config file loading (behind `config-file` feature) ────────────────────── + +#[cfg(feature = "config-file")] +mod config_file { + use super::{DirectConfig, FpssFlushMode, ReconnectPolicy, RetryPolicy}; + use crate::error::Error; + use serde::Deserialize; + + /// TOML-level representation of the config file. + /// + /// Unknown keys are silently ignored (`#[serde(default)]` on each section). + /// Missing sections fall back to production defaults. + #[derive(Debug, Default, Deserialize)] + #[serde(default)] + struct ConfigFile { + mdds: MddsSection, + fpss: FpssSection, + grpc: GrpcSection, + auth: AuthSection, + } + + #[derive(Debug, Deserialize)] + #[serde(default)] + struct MddsSection { + host: String, + port: u16, + tls: bool, + keepalive_time_secs: u64, + keepalive_timeout_secs: u64, + max_message_size: usize, + } + + impl Default for MddsSection { + fn default() -> Self { + let prod = DirectConfig::production(); + Self { + host: prod.mdds.host, + port: prod.mdds.port, + tls: prod.mdds.tls, + keepalive_time_secs: prod.mdds.keepalive_secs, + keepalive_timeout_secs: prod.mdds.keepalive_timeout_secs, + max_message_size: prod.mdds.max_message_size, + } + } + } + + #[derive(Debug, Deserialize)] + #[serde(default)] + struct FpssSection { + /// Hosts as `["host:port", ...]` array or `"host:port,host:port"` string. + hosts: FpssHosts, + connect_timeout: u64, + read_timeout: u64, + ping_interval: u64, + reconnect_wait: u64, + reconnect_wait_rate_limited: u64, + queue_depth: usize, + ring_size: usize, + flush_mode: String, + } + + impl Default for FpssSection { + fn default() -> Self { + let prod = DirectConfig::production(); + Self { + hosts: FpssHosts::Array( + prod.fpss + .hosts + .iter() + .map(|(h, p)| format!("{h}:{p}")) + .collect(), + ), + connect_timeout: prod.fpss.connect_timeout_ms, + read_timeout: prod.fpss.timeout_ms, + ping_interval: prod.fpss.ping_interval_ms, + reconnect_wait: prod.reconnect.wait_ms, + reconnect_wait_rate_limited: prod.reconnect.wait_rate_limited_ms, + queue_depth: prod.fpss.queue_depth, + ring_size: prod.fpss.ring_size, + flush_mode: "batched".to_string(), + } + } + } + + /// FPSS hosts can be specified as either a TOML array or a comma-separated string. + #[derive(Debug, Deserialize)] + #[serde(untagged)] + enum FpssHosts { + Array(Vec), + Csv(String), + } + + impl Default for FpssHosts { + fn default() -> Self { + let prod = DirectConfig::production(); + FpssHosts::Array( + prod.fpss + .hosts + .iter() + .map(|(h, p)| format!("{h}:{p}")) + .collect(), + ) + } + } + + #[derive(Debug, Deserialize)] + #[serde(default)] + struct GrpcSection { + window_size_kb: usize, + connection_window_size_kb: usize, + max_message_size_mb: usize, + concurrent_requests: usize, + } + + impl Default for GrpcSection { + fn default() -> Self { + let prod = DirectConfig::production(); + Self { + window_size_kb: prod.mdds.window_size_kb, + connection_window_size_kb: prod.mdds.connection_window_size_kb, + max_message_size_mb: prod.mdds.max_message_size / (1024 * 1024), + concurrent_requests: prod.mdds.concurrent_requests, + } + } + } + + #[derive(Debug, Default, Deserialize)] + #[serde(default)] + struct AuthSection { + #[serde(rename = "creds_file")] + _creds_file: Option, + } + + impl FpssHosts { + fn parse(self) -> Result, Error> { + let entries = match self { + FpssHosts::Array(arr) => arr, + FpssHosts::Csv(s) => s.split(',').map(|s| s.trim().to_string()).collect(), + }; + let mut result = Vec::new(); + for entry in entries { + let entry = entry.trim(); + if entry.is_empty() { + continue; + } + let (host, port_str) = entry + .rsplit_once(':') + .ok_or_else(|| Error::Config(format!("invalid host:port entry: '{entry}'")))?; + let port: u16 = port_str + .parse() + .map_err(|e| Error::Config(format!("invalid port in '{entry}': {e}")))?; + result.push((host.to_string(), port)); + } + if result.is_empty() { + return Err(Error::Config("no FPSS hosts provided".to_string())); + } + Ok(result) + } + } + + impl DirectConfig { + /// Load configuration from a TOML file. + /// + /// The file format matches `config.default.toml` shipped with the crate. + /// Missing sections and keys fall back to [`DirectConfig::production()`] defaults. + /// Unknown keys are silently ignored. + /// + /// # Example file + /// + /// ```toml + /// [mdds] + /// host = "mdds-01.thetadata.us" + /// port = 443 + /// tls = true + /// + /// [fpss] + /// hosts = ["nj-a.thetadata.us:20000", "nj-b.thetadata.us:20000"] + /// reconnect_wait = 2000 + /// queue_depth = 1_000_000 + /// flush_mode = "batched" # or "immediate" + /// + /// [grpc] + /// window_size_kb = 64 + /// connection_window_size_kb = 64 + /// concurrent_requests = 0 # 0 = auto from tier + /// ``` + /// # Errors + /// + /// Returns an error on network, authentication, or parsing failure. + pub fn from_file(path: impl AsRef) -> Result { + let contents = std::fs::read_to_string(path.as_ref()).map_err(|e| { + Error::Config(format!( + "failed to read config file '{}': {e}", + path.as_ref().display() + )) + })?; + Self::from_toml_str(&contents) + } + + /// Parse configuration from a TOML string. + /// + /// Same semantics as [`from_file`](Self::from_file) but takes a string directly. + /// # Errors + /// + /// Returns an error on network, authentication, or parsing failure. + pub fn from_toml_str(toml_str: &str) -> Result { + let cf: ConfigFile = toml::from_str(toml_str) + .map_err(|e| Error::Config(format!("failed to parse TOML config: {e}")))?; + + let flush_mode = match cf.fpss.flush_mode.to_lowercase().as_str() { + "immediate" => FpssFlushMode::Immediate, + _ => FpssFlushMode::Batched, + }; + + // If [grpc].max_message_size_mb is set, it overrides [mdds].max_message_size. + // The grpc section value is in MB; the mdds section value is in bytes. + let max_message_size = if cf.grpc.max_message_size_mb + != DirectConfig::production().mdds.max_message_size / (1024 * 1024) + { + cf.grpc.max_message_size_mb * 1024 * 1024 + } else { + cf.mdds.max_message_size + }; + + let mut out = DirectConfig::production_defaults(); + out.mdds.host = cf.mdds.host; + out.mdds.port = cf.mdds.port; + out.mdds.tls = cf.mdds.tls; + out.mdds.concurrent_requests = cf.grpc.concurrent_requests; + out.mdds.max_message_size = max_message_size; + out.mdds.keepalive_secs = cf.mdds.keepalive_time_secs; + out.mdds.keepalive_timeout_secs = cf.mdds.keepalive_timeout_secs; + out.mdds.window_size_kb = cf.grpc.window_size_kb; + out.mdds.connection_window_size_kb = cf.grpc.connection_window_size_kb; + // mdds.connect_timeout_secs is not yet TOML-surfaced; keep production default. + + out.fpss.hosts = cf.fpss.hosts.parse()?; + out.fpss.timeout_ms = cf.fpss.read_timeout; + out.fpss.queue_depth = cf.fpss.queue_depth; + out.fpss.ring_size = cf.fpss.ring_size; + out.fpss.ping_interval_ms = cf.fpss.ping_interval; + out.fpss.connect_timeout_ms = cf.fpss.connect_timeout; + out.fpss.flush_mode = flush_mode; + // Default: derive OHLCVC from trades (matches production default). + // Use the builder API to disable programmatically. + out.fpss.derive_ohlcvc = true; + + out.reconnect.wait_ms = cf.fpss.reconnect_wait; + out.reconnect.wait_rate_limited_ms = cf.fpss.reconnect_wait_rate_limited; + // TOML config cannot express custom closures; default to Auto. + // Use the builder API to set Manual or Custom programmatically. + out.reconnect.policy = ReconnectPolicy::Auto; + + // TOML does not surface RetryPolicy / observability fields + // today — the builder API (`with_retry_policy`, + // `with_metrics_port`, env vars) is the opt-in path. + out.retry = RetryPolicy::default(); + out.auth.nexus_url = DirectConfig::DEFAULT_NEXUS_URL.to_string(); + out.auth.client_type = DirectConfig::DEFAULT_CLIENT_TYPE.to_string(); + out.metrics.port = None; + out.runtime.tokio_worker_threads = None; + + Ok(out.validate()) + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn production_mdds_uri() { + let config = DirectConfig::production(); + assert_eq!(config.mdds_uri(), "https://mdds-01.thetadata.us:443"); + } + + #[test] + fn production_has_four_fpss_hosts() { + let config = DirectConfig::production(); + assert_eq!(config.fpss.hosts.len(), 4); + } + + #[test] + fn production_default_reconnect_policy_is_auto() { + let config = DirectConfig::production(); + assert!(matches!(config.reconnect.policy, ReconnectPolicy::Auto)); + } + + #[test] + fn production_mdds_connect_timeout_default_is_ten_seconds() { + let config = DirectConfig::production(); + assert_eq!(config.mdds.connect_timeout_secs, 10); + } + + #[test] + fn read_accessors_match_nested_fields() { + let config = DirectConfig::production(); + assert_eq!(config.mdds_host(), config.mdds.host.as_str()); + assert_eq!(config.fpss_queue_depth(), config.fpss.queue_depth); + assert_eq!(config.metrics_port(), config.metrics.port); + assert_eq!( + config.tokio_worker_threads(), + config.runtime.tokio_worker_threads + ); + assert_eq!(config.nexus_url(), config.auth.nexus_url.as_str()); + } + + #[test] + fn parse_fpss_hosts_parses_multi_host_csv_with_whitespace_and_empty_entries() { + let hosts = + DirectConfig::parse_fpss_hosts(" nj-a.thetadata.us:20000, ,nj-b.thetadata.us:20001 ") + .unwrap(); + assert_eq!(hosts.len(), 2); + assert_eq!(hosts[0], ("nj-a.thetadata.us".to_string(), 20000)); + assert_eq!(hosts[1], ("nj-b.thetadata.us".to_string(), 20001)); + } + + #[test] + fn parse_fpss_hosts_rejects_malformed_entries() { + assert!(DirectConfig::parse_fpss_hosts("").is_err()); + assert!(DirectConfig::parse_fpss_hosts("host:notaport").is_err()); + assert!(DirectConfig::parse_fpss_hosts("hostonly").is_err()); + } + + // -- Config file tests (only compiled with the `config-file` feature) -- + + #[cfg(feature = "config-file")] + mod config_file_tests { + use crate::config::{DirectConfig, FpssFlushMode}; + + #[test] + fn empty_toml_gives_production_defaults() { + let config = DirectConfig::from_toml_str("").unwrap(); + let prod = DirectConfig::production(); + assert_eq!(config.mdds.host, prod.mdds.host); + assert_eq!(config.mdds.port, prod.mdds.port); + assert_eq!(config.fpss.hosts.len(), prod.fpss.hosts.len()); + assert_eq!(config.fpss.queue_depth, prod.fpss.queue_depth); + } + + #[test] + fn partial_toml_overrides_only_specified() { + let toml = r#" + [mdds] + host = "custom.example.com" + port = 8443 + + [fpss] + queue_depth = 500000 + "#; + let config = DirectConfig::from_toml_str(toml).unwrap(); + assert_eq!(config.mdds.host, "custom.example.com"); + assert_eq!(config.mdds.port, 8443); + assert_eq!(config.fpss.queue_depth, 500000); + // Unspecified fields keep production defaults + assert!(config.mdds.tls); + } + + #[test] + fn fpss_hosts_as_array() { + let toml = r#" + [fpss] + hosts = ["host-a.example.com:20000", "host-b.example.com:20001"] + "#; + let config = DirectConfig::from_toml_str(toml).unwrap(); + assert_eq!(config.fpss.hosts.len(), 2); + assert_eq!( + config.fpss.hosts[0], + ("host-a.example.com".to_string(), 20000) + ); + assert_eq!( + config.fpss.hosts[1], + ("host-b.example.com".to_string(), 20001) + ); + } + + #[test] + fn fpss_hosts_as_csv_string() { + let toml = r#" + [fpss] + hosts = "host-a.example.com:20000,host-b.example.com:20001" + "#; + let config = DirectConfig::from_toml_str(toml).unwrap(); + assert_eq!(config.fpss.hosts.len(), 2); + assert_eq!(config.fpss.hosts[0].0, "host-a.example.com"); + } + + #[test] + fn flush_mode_immediate() { + let toml = r#" + [fpss] + flush_mode = "immediate" + "#; + let config = DirectConfig::from_toml_str(toml).unwrap(); + assert_eq!(config.fpss.flush_mode, FpssFlushMode::Immediate); + } + + #[test] + fn flush_mode_batched_by_default() { + let toml = r#" + [fpss] + flush_mode = "batched" + "#; + let config = DirectConfig::from_toml_str(toml).unwrap(); + assert_eq!(config.fpss.flush_mode, FpssFlushMode::Batched); + } + + #[test] + fn grpc_section_sets_window_sizes() { + let toml = r#" + [grpc] + window_size_kb = 128 + connection_window_size_kb = 256 + concurrent_requests = 4 + "#; + let config = DirectConfig::from_toml_str(toml).unwrap(); + assert_eq!(config.mdds.window_size_kb, 128); + assert_eq!(config.mdds.connection_window_size_kb, 256); + assert_eq!(config.mdds.concurrent_requests, 4); + } + + #[test] + fn grpc_max_message_size_mb_overrides_mdds_bytes() { + let toml = r#" + [grpc] + max_message_size_mb = 8 + "#; + let config = DirectConfig::from_toml_str(toml).unwrap(); + assert_eq!(config.mdds.max_message_size, 8 * 1024 * 1024); + } + + #[test] + fn unknown_keys_are_ignored() { + let toml = r#" + [mdds] + host = "mdds-01.thetadata.us" + port = 443 + unknown_key = "should be ignored" + + [some_unknown_section] + foo = "bar" + "#; + // Should not error + let config = DirectConfig::from_toml_str(toml).unwrap(); + assert_eq!(config.mdds.port, 443); + } + + #[test] + fn full_config_default_toml_parses() { + // Validate that config.default.toml (shipped with the crate) can be parsed. + let default_toml = include_str!("../../../../config.default.toml"); + let config = DirectConfig::from_toml_str(default_toml).unwrap(); + assert_eq!(config.mdds.host, "mdds-01.thetadata.us"); + assert_eq!(config.mdds.port, 443); + assert_eq!(config.fpss.hosts.len(), 4); + } + + #[test] + fn invalid_toml_returns_error() { + let result = DirectConfig::from_toml_str("this is not valid toml [[["); + assert!(result.is_err()); + assert!(result.unwrap_err().to_string().contains("TOML")); + } + } + + // -- Validation tests -- + + #[test] + fn validate_clamps_out_of_range_values() { + let mut config = DirectConfig::production_defaults(); + config.fpss.queue_depth = 5; + config.mdds.window_size_kb = 2_048; + let config = config.validate(); + assert_eq!(config.fpss.queue_depth, 16); + assert_eq!(config.mdds.window_size_kb, 1_024); + } + + #[test] + fn validate_preserves_in_range_values() { + let config = DirectConfig::production_defaults(); + let validated = config.validate(); + assert_eq!(validated.fpss.queue_depth, 1_000_000); + assert_eq!(validated.mdds.window_size_kb, 64); + } + + // ── RetryPolicy / env var tests ────────────────────────────────── + + #[test] + fn retry_policy_default_shape_is_stable() { + let p = RetryPolicy::default(); + assert_eq!(p.initial_delay, std::time::Duration::from_millis(250)); + assert_eq!(p.max_delay, std::time::Duration::from_secs(30)); + assert_eq!(p.max_attempts, 5); + assert!(p.jitter); + } + + #[test] + fn retry_policy_capped_backoff_doubles_each_attempt_then_caps() { + use std::time::Duration; + let p = RetryPolicy { + initial_delay: Duration::from_millis(100), + max_delay: Duration::from_millis(800), + max_attempts: 10, + jitter: false, + }; + assert_eq!(p.capped_backoff(0), Duration::ZERO); + assert_eq!(p.capped_backoff(1), Duration::from_millis(100)); + assert_eq!(p.capped_backoff(2), Duration::from_millis(200)); + assert_eq!(p.capped_backoff(3), Duration::from_millis(400)); + assert_eq!(p.capped_backoff(4), Duration::from_millis(800)); + // Saturates at max_delay; never exceeds the cap even on absurd attempt counts. + assert_eq!(p.capped_backoff(5), Duration::from_millis(800)); + assert_eq!(p.capped_backoff(60), Duration::from_millis(800)); + } + + #[test] + fn retry_policy_delay_for_attempt_respects_jitter_upper_bound() { + use std::time::Duration; + let p = RetryPolicy { + initial_delay: Duration::from_millis(100), + max_delay: Duration::from_millis(1_000), + max_attempts: 10, + jitter: true, + }; + // Full-jitter envelope: sample ∈ [0, capped_backoff(attempt)]. + // Exercise 200 draws per attempt to shake out off-by-one issues + // without making the test flaky — every sample must land in + // the closed interval above. + for attempt in 1..=6u32 { + let ceiling = p.capped_backoff(attempt); + for _ in 0..200 { + let delay = p.delay_for_attempt(attempt); + assert!( + delay <= ceiling, + "attempt {attempt}: delay {delay:?} exceeded ceiling {ceiling:?}" + ); + } + } + } + + #[test] + fn retry_policy_delay_for_attempt_deterministic_without_jitter() { + use std::time::Duration; + let p = RetryPolicy { + initial_delay: Duration::from_millis(50), + max_delay: Duration::from_millis(400), + max_attempts: 5, + jitter: false, + }; + // No jitter → every draw equals the capped backoff envelope. + for attempt in 1..=4u32 { + let expected = p.capped_backoff(attempt); + for _ in 0..16 { + assert_eq!(p.delay_for_attempt(attempt), expected); + } + } + } + + #[test] + fn retry_policy_disabled_yields_single_attempt() { + use std::time::Duration; + let p = RetryPolicy::disabled(); + assert_eq!(p.max_attempts, 1); + assert_eq!(p.delay_for_attempt(1), Duration::ZERO); + assert!(!p.jitter); + } + + // `std::env` is a process-global singleton; the env-var tests use a + // single mutex so they don't trample each other under + // `cargo test -- --test-threads=N`. Each test keeps hold of the + // guard for the duration of the config build + assertions. + fn env_test_guard() -> std::sync::MutexGuard<'static, ()> { + use std::sync::{Mutex, OnceLock}; + static LOCK: OnceLock> = OnceLock::new(); + LOCK.get_or_init(|| Mutex::new(())) + .lock() + .unwrap_or_else(|poison| poison.into_inner()) + } + + fn clear_env_matrix() { + // Unset every variable the env-override path reads so no test + // leaks into another. The guard above pins us as the sole writer. + unsafe { + // Reason: test-only mutation; protected by env_test_guard. + std::env::remove_var(ENV_MDDS_HOST); + std::env::remove_var(ENV_MDDS_PORT); + std::env::remove_var(ENV_NEXUS_URL); + std::env::remove_var(ENV_FPSS_HOST); + std::env::remove_var(ENV_FPSS_PORT); + std::env::remove_var(ENV_CLIENT_TYPE); + } + } + + #[test] + fn env_overrides_apply_on_production() { + let _guard = env_test_guard(); + clear_env_matrix(); + unsafe { + // Reason: test-only mutation; protected by env_test_guard. + std::env::set_var(ENV_MDDS_HOST, "mdds.staging.example.com"); + std::env::set_var(ENV_MDDS_PORT, "8443"); + std::env::set_var(ENV_NEXUS_URL, "https://nexus.staging.example.com/auth"); + std::env::set_var(ENV_CLIENT_TYPE, "rust-thetadatadx-staging"); + std::env::set_var(ENV_FPSS_HOST, "fpss.staging.example.com"); + std::env::set_var(ENV_FPSS_PORT, "21000"); + } + let config = DirectConfig::production(); + assert_eq!(config.mdds.host, "mdds.staging.example.com"); + assert_eq!(config.mdds.port, 8443); + assert_eq!( + config.auth.nexus_url, + "https://nexus.staging.example.com/auth" + ); + assert_eq!(config.auth.client_type, "rust-thetadatadx-staging"); + assert_eq!( + config.fpss.hosts[0], + ("fpss.staging.example.com".to_string(), 21000) + ); + clear_env_matrix(); + } + + #[test] + fn builder_takes_precedence_over_env_var() { + let _guard = env_test_guard(); + clear_env_matrix(); + unsafe { + // Reason: test-only mutation; protected by env_test_guard. + std::env::set_var(ENV_CLIENT_TYPE, "env-wins-when-no-builder"); + } + let config = DirectConfig::production().with_client_type("builder-wins"); + assert_eq!(config.auth.client_type, "builder-wins"); + clear_env_matrix(); + } + + #[test] + fn env_overrides_skipped_when_values_malformed() { + let _guard = env_test_guard(); + clear_env_matrix(); + unsafe { + // Reason: test-only mutation; protected by env_test_guard. + std::env::set_var(ENV_MDDS_PORT, "not-a-port"); + std::env::set_var(ENV_FPSS_PORT, "0"); // reject zero + std::env::set_var(ENV_MDDS_HOST, " "); // whitespace-only + } + let config = DirectConfig::production(); + let defaults = DirectConfig::production_defaults(); + assert_eq!(config.mdds.host, defaults.mdds.host); + assert_eq!(config.mdds.port, defaults.mdds.port); + assert_eq!(config.fpss.hosts[0].1, defaults.fpss.hosts[0].1); + clear_env_matrix(); + } + + #[test] + fn production_defaults_are_not_sensitive_to_env() { + let _guard = env_test_guard(); + clear_env_matrix(); + unsafe { + // Reason: test-only mutation; protected by env_test_guard. + std::env::set_var(ENV_MDDS_HOST, "ignored-by-defaults"); + std::env::set_var(ENV_MDDS_PORT, "9999"); + } + let config = DirectConfig::production_defaults(); + assert_eq!(config.mdds.host, "mdds-01.thetadata.us"); + assert_eq!(config.mdds.port, 443); + clear_env_matrix(); + } +} diff --git a/crates/thetadatadx/src/config/reconnect.rs b/crates/thetadatadx/src/config/reconnect.rs new file mode 100644 index 00000000..fa6113f9 --- /dev/null +++ b/crates/thetadatadx/src/config/reconnect.rs @@ -0,0 +1,92 @@ +//! FPSS reconnection sub-configuration. + +use std::sync::Arc; +use std::time::Duration; + +use tdbe::types::enums::RemoveReason; + +/// Controls FPSS reconnection behavior after a disconnect. +/// +/// # Default +/// +/// [`ReconnectPolicy::Auto`] matches the Java terminal's `handleInvoluntaryDisconnect()`: +/// permanent errors stop immediately, `TooManyRequests` waits 130s, everything else +/// waits 2s, up to 5 attempts. +/// +/// # Custom +/// +/// Supply a closure that receives the disconnect reason and attempt number (1-based) +/// and returns `Some(delay)` to reconnect after that delay, or `None` to stop. +#[derive(Clone, Default)] +pub enum ReconnectPolicy { + /// Auto-reconnect matching Java terminal behavior (default). + /// + /// - Permanent errors (invalid credentials, account issues): no reconnect. + /// - `TooManyRequests`: 130s wait. + /// - All others: 2s wait. + /// - Up to 5 consecutive reconnect attempts before giving up. + #[default] + Auto, + /// No auto-reconnect. User calls `reconnect_streaming()` manually. + Manual, + /// User-provided function: `(reason, attempt_number) -> Option`. + /// + /// Return `Some(delay)` to reconnect after `delay`, `None` to stop. + /// `attempt_number` starts at 1 and increments on each consecutive reconnect. + Custom(Arc Option + Send + Sync>), +} + +impl std::fmt::Debug for ReconnectPolicy { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::Auto => write!(f, "Auto"), + Self::Manual => write!(f, "Manual"), + Self::Custom(_) => write!(f, "Custom(...)"), + } + } +} + +/// FPSS auto-reconnection cadence + policy. +#[derive(Debug, Clone)] +pub struct ReconnectConfig { + /// Delay before attempting reconnection after a disconnect, in milliseconds. + /// + /// Source: `FPSSClient.RECONNECT_DELAY_MS = 2000` in decompiled terminal. + /// Note: `config_0.properties` has `RECONNECT_WAIT=1000` but the Java code + /// uses the constant `2000` at runtime. + /// + /// NOTE: Not automatically wired — consumed by + /// [`crate::ThetaDataDx::reconnect_streaming`] / the FPSS auto-reconnect path. + pub wait_ms: u64, + + /// Delay before reconnecting after a `TooManyRequests` disconnect, in milliseconds. + /// + /// Source: `FPSSClient.handleInvoluntaryDisconnect()` — 130 second wait. + /// + /// NOTE: Not automatically wired — consumed by + /// [`crate::ThetaDataDx::reconnect_streaming`] / the FPSS auto-reconnect path. + pub wait_rate_limited_ms: u64, + + /// Controls FPSS auto-reconnection behavior after involuntary disconnect. + /// + /// Default: [`ReconnectPolicy::Auto`] — matches Java terminal behavior. + pub policy: ReconnectPolicy, +} + +impl ReconnectConfig { + /// Production defaults — matches the Java terminal's reconnect cadence. + #[must_use] + pub fn production_defaults() -> Self { + Self { + wait_ms: 2_000, + wait_rate_limited_ms: 130_000, + policy: ReconnectPolicy::Auto, + } + } +} + +impl Default for ReconnectConfig { + fn default() -> Self { + Self::production_defaults() + } +} diff --git a/crates/thetadatadx/src/config/retry.rs b/crates/thetadatadx/src/config/retry.rs new file mode 100644 index 00000000..88921a36 --- /dev/null +++ b/crates/thetadatadx/src/config/retry.rs @@ -0,0 +1,133 @@ +//! Exponential-backoff retry policy for transient gRPC errors on MDDS. + +use std::time::Duration; + +/// Exponential-backoff retry policy for transient gRPC errors on MDDS. +/// +/// Only wired on status codes `Unavailable`, `DeadlineExceeded`, and +/// `ResourceExhausted`. Permission / credential failures route through +/// the separate auto-refresh path (see the in-crate `MddsClient` wrappers) +/// and are never retried by this policy. +/// +/// # Jitter +/// +/// With `jitter = true` (default) the sleep duration follows AWS's +/// *full jitter* pattern: `delay = rand(0, min(max_delay, initial * +/// 2^attempt))`. Full jitter provably minimises retry-storm contention +/// relative to equal jitter or no jitter; see +/// . +/// +/// With `jitter = false` the delay is the deterministic backoff +/// `min(max_delay, initial * 2^attempt)`. Useful for tests that +/// need to assert exact timings. +#[derive(Debug, Clone, Copy)] +pub struct RetryPolicy { + /// Delay used for the first retry (attempt 1). Doubles per attempt. + pub initial_delay: Duration, + /// Upper bound on the computed backoff delay, regardless of attempt. + pub max_delay: Duration, + /// Total attempt budget. `1` disables retry (single call only); + /// `0` still permits the initial call but allows no retries. + pub max_attempts: u32, + /// Apply AWS-style full jitter to each retry delay. + pub jitter: bool, +} + +impl Default for RetryPolicy { + fn default() -> Self { + Self { + initial_delay: Duration::from_millis(250), + max_delay: Duration::from_secs(30), + max_attempts: 5, + jitter: true, + } + } +} + +impl RetryPolicy { + /// Build a policy with retry disabled — single attempt, no backoff. + #[must_use] + pub fn disabled() -> Self { + Self { + initial_delay: Duration::ZERO, + max_delay: Duration::ZERO, + max_attempts: 1, + jitter: false, + } + } + + /// Compute the sleep delay before the next retry. + /// + /// `attempt` is 1-based (attempt 1 = first retry after the initial + /// call failed). The returned duration is: + /// + /// * capped at `max_delay`, + /// * exponentiated as `initial_delay * 2^(attempt - 1)`, + /// * jittered (when `self.jitter`) across `[0, capped_delay]`. + /// + /// Overflow in `initial_delay * 2^(attempt - 1)` saturates at + /// `max_delay` rather than wrapping, so pathological `attempt` + /// values never yield a zero delay. + #[must_use] + pub fn delay_for_attempt(&self, attempt: u32) -> Duration { + let capped = self.capped_backoff(attempt); + if self.jitter { + jitter_sample(capped) + } else { + capped + } + } + + /// Deterministic capped backoff (no jitter). Exposed for tests that + /// need to assert the upper-bound envelope for a given attempt. + #[must_use] + pub fn capped_backoff(&self, attempt: u32) -> Duration { + if attempt == 0 { + return Duration::ZERO; + } + // `shift = attempt - 1` so attempt 1 = base, attempt 2 = base*2, + // attempt 3 = base*4. `u32::checked_shl(shift)` overflows + // exactly when `shift >= 32`; clamp before shifting. + let shift = (attempt - 1).min(31); + let base_nanos = self.initial_delay.as_nanos(); + let scaled_nanos = base_nanos.checked_shl(shift).unwrap_or(u128::MAX); + let max_nanos = self.max_delay.as_nanos(); + let nanos = scaled_nanos.min(max_nanos); + // `Duration::from_nanos` takes u64 — clamp rather than truncate. + Duration::from_nanos(u64::try_from(nanos).unwrap_or(u64::MAX)) + } +} + +/// Full-jitter sampler: uniform on `[0, ceiling]`. Uses the `Instant`- +/// derived nanosecond clock as an entropy source so we do not pull in +/// a dedicated RNG crate — sufficient for jitter randomisation where +/// the statistical quality requirement is "any non-pathological spread +/// across callers", not cryptographic randomness. +fn jitter_sample(ceiling: Duration) -> Duration { + let ceiling_nanos = ceiling.as_nanos(); + if ceiling_nanos == 0 { + return Duration::ZERO; + } + // `Instant::elapsed` inside a test might return 0 on some CI + // schedulers; folding both `elapsed` and a process-local counter + // guarantees the sampler advances even then. + use std::sync::atomic::{AtomicU64, Ordering}; + static COUNTER: AtomicU64 = AtomicU64::new(0); + let tick = COUNTER.fetch_add(1, Ordering::Relaxed); + let now_nanos = u64::try_from( + std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .map_or(0, |d| d.as_nanos()), + ) + .unwrap_or(u64::MAX); + // Reason: splitmix64 constants — documented mixer, fine for jitter. + let mut seed = now_nanos ^ tick.wrapping_mul(0x9E37_79B9_7F4A_7C15); + seed ^= seed >> 30; + seed = seed.wrapping_mul(0xBF58_476D_1CE4_E5B9); + seed ^= seed >> 27; + seed = seed.wrapping_mul(0x94D0_49BB_1331_11EB); + seed ^= seed >> 31; + let ceiling_u128 = ceiling_nanos; + let bounded = u128::from(seed) % (ceiling_u128 + 1); + Duration::from_nanos(u64::try_from(bounded).unwrap_or(u64::MAX)) +} diff --git a/crates/thetadatadx/src/config/runtime.rs b/crates/thetadatadx/src/config/runtime.rs new file mode 100644 index 00000000..27c611da --- /dev/null +++ b/crates/thetadatadx/src/config/runtime.rs @@ -0,0 +1,13 @@ +//! Async runtime sub-configuration (tokio worker thread sizing). + +/// Async runtime tuning. +#[derive(Debug, Clone, Default)] +pub struct RuntimeConfig { + /// Number of tokio worker threads. `None` = tokio default (number of CPU cores). + /// + /// JVM equivalent: `-Xmx` + `HTTP_CONCURRENCY` thread pool sizing. + /// + /// NOTE: Not automatically wired — caller should use this when building + /// a custom tokio runtime. + pub tokio_worker_threads: Option, +} diff --git a/crates/thetadatadx/src/decode.rs b/crates/thetadatadx/src/decode.rs deleted file mode 100644 index 3da82e35..00000000 --- a/crates/thetadatadx/src/decode.rs +++ /dev/null @@ -1,2177 +0,0 @@ -use std::cell::RefCell; - -use crate::error::Error; -use crate::proto; -use tdbe::types::tick::{ - CalendarDay, EodTick, GreeksAllTick, GreeksFirstOrderTick, GreeksSecondOrderTick, - GreeksThirdOrderTick, InterestRateTick, IvTick, MarketValueTick, OhlcTick, OpenInterestTick, - OptionContract, PriceTick, QuoteTick, TradeQuoteTick, TradeTick, -}; -use thiserror::Error as ThisError; - -/// Per-cell decode failure. Produced by the `row_*` helpers when a cell does -/// not match the column's declared type, or when the requested column index is -/// past the end of the row. Mirrors the Java terminal's `IllegalArgumentException` -/// path in `PojoMessageUtils.convert`. -#[derive(Debug, ThisError, PartialEq, Eq)] -pub enum DecodeError { - /// Cell exists but its `DataType` variant does not match the declared - /// schema for this column. - #[error("column {column}: expected {expected}, got {observed}")] - TypeMismatch { - column: usize, - expected: &'static str, - observed: &'static str, - }, - /// Row has fewer cells than the requested column index. - #[error("column {column}: missing cell")] - MissingCell { column: usize }, - /// A required header (declared in `tick_schema.toml` under - /// `required = [...]`) is absent from a non-empty `DataTable`. Emitted by - /// the generated parsers when the server has added or renamed the column — - /// surfacing this as an error is the only way to prevent silent data loss - /// when the upstream schema drifts (see `HEADER_ALIASES` for known - /// synonyms). Empty `DataTable`s (no rows) still return `Ok(vec![])` - /// because "no trades today" is a legitimate outcome. - #[error( - "required column `{header}` missing from {rows}-row DataTable; \ - available headers: {available}" - )] - MissingRequiredHeader { - header: &'static str, - rows: usize, - available: String, - }, - /// A mid-stream gRPC chunk carries a header set that does not match the - /// header set established by the first chunk. The stream accumulator - /// used to silently retain the first header set and accumulate rows - /// from every chunk underneath it, which would transparently corrupt - /// a row set if the server's wire schema changed mid-response. This - /// variant surfaces the drift instead of hiding it. - #[error( - "chunk {chunk_index} headers drifted from first-chunk schema; \ - first: [{first}]; chunk: [{chunk}]" - )] - ChunkHeaderDrift { - chunk_index: usize, - first: String, - chunk: String, - }, -} - -/// Name the `DataType` variant for error messages. `None` is treated as a -/// missing `data_type` oneof (protobuf cell with no variant set). -pub(crate) fn observed_name(dt: Option<&proto::data_value::DataType>) -> &'static str { - match dt { - Some(proto::data_value::DataType::Number(_)) => "Number", - Some(proto::data_value::DataType::Text(_)) => "Text", - Some(proto::data_value::DataType::Price(_)) => "Price", - Some(proto::data_value::DataType::Timestamp(_)) => "Timestamp", - Some(proto::data_value::DataType::NullValue(_)) => "NullValue", - None => "Unset", - } -} - -/// Header aliases: v3 MDDS uses different column names than the tick schema. -/// This maps schema names to their v3 equivalents so parsers work with both. -/// -/// Validated against a real v3 MDDS response capture (see -/// `tests/fixtures/captures/`). Each entry is `(schema_name, server_name)`: -/// `find_header("ms_of_day", h)` returns the index of the first matching -/// server column in `h`. -const HEADER_ALIASES: &[(&str, &str)] = &[ - // Generic time column: MDDS sends a proto `Timestamp`, the tick schema - // models it as an i32 ms-of-day. `row_number` handles the conversion. - ("ms_of_day", "timestamp"), - ("ms_of_day", "created"), - // Combined trade + quote responses split the two time columns into - // `trade_timestamp` (the trade side → `ms_of_day`) and `quote_timestamp` - // (the quote side → `quote_ms_of_day`). Without these aliases the - // `TradeQuoteTick` parser falls through the required-header guard and - // produces an empty Vec on ~1M-row responses (P11). - ("ms_of_day", "trade_timestamp"), - ("quote_ms_of_day", "quote_timestamp"), - ("ms_of_day2", "timestamp2"), - ("ms_of_day2", "last_trade"), - ("date", "timestamp"), - ("date", "created"), - ("date", "trade_timestamp"), - // option_list_contracts returns "symbol" where the schema says "root" - ("root", "symbol"), - // v3 uses "implied_vol" where the schema says "implied_volatility" - ("implied_volatility", "implied_vol"), - // The vendor's per-order Greeks endpoints (`option_*_greeks_*_order`) - // and the `_greeks_all` / `_greeks_eod` endpoints publish the - // underlying snapshot timestamp as `underlying_timestamp`. The tick - // schema models it as `underlying_ms_of_day` so the wire conversion - // (Timestamp -> ms-of-day) flows through the standard `row_number` - // path without a per-tick parser branch. - ("underlying_ms_of_day", "underlying_timestamp"), -]; - -/// Helper: find a column index by name, with alias fallback. -/// -/// The v3 MDDS server uses `timestamp` where the tick schema says `ms_of_day`. -/// This function checks the primary name first, then falls back to known aliases. -/// -/// Returns `None` silently when the header is absent — required-header -/// guards in the generated parsers surface a typed -/// [`Error::MissingRequiredHeader`] for the must-have columns; optional -/// columns missing from a subset response (e.g. `option_snapshot_greeks_third_order` -/// returning only the third-order Greek columns from the `GreeksTick` -/// union schema) are by design. Header drift can be observed at the -/// `trace` level via `RUST_LOG=thetadatadx=trace`. -fn find_header(headers: &[&str], name: &str) -> Option { - // Try exact match first. - if let Some(pos) = headers.iter().position(|&s| s == name) { - return Some(pos); - } - // Try aliases. - for &(schema_name, server_name) in HEADER_ALIASES { - if name == schema_name { - if let Some(pos) = headers.iter().position(|&s| s == server_name) { - return Some(pos); - } - } - } - tracing::trace!( - header = name, - "column header not present in DataTable (optional or subset response)" - ); - None -} - -/// Eastern Time UTC offset in milliseconds for a given `epoch_ms`. -/// -/// US DST rules changed over time: -/// -/// **2007-onward** (Energy Policy Act of 2005): -/// - EDT (UTC-4): second Sunday of March at 2:00 AM local -> first Sunday of November at 2:00 AM local -/// - EST (UTC-5): rest of the year -/// -/// **Before 2007** (Uniform Time Act of 1966): -/// - EDT (UTC-4): first Sunday of April at 2:00 AM local -> last Sunday of October at 2:00 AM local -/// - EST (UTC-5): rest of the year -/// -/// We compute the transition points in UTC and compare. This avoids -/// external timezone crate dependencies while being correct for all -/// dates with US Eastern Time DST rules. -// Reason: the Euclidean date algorithm uses intentional signed/unsigned conversions for valid epoch timestamps. -#[allow( - clippy::cast_possible_wrap, - clippy::cast_sign_loss, - clippy::cast_possible_truncation -)] -fn eastern_offset_ms(epoch_ms: u64) -> i64 { - // First, determine the UTC year/month/day to find DST boundaries. - let epoch_secs = epoch_ms as i64 / 1_000; - let days_since_epoch = epoch_secs / 86_400; - - // Civil date from days since 1970-01-01 (Euclidean algorithm). - let z = days_since_epoch + 719_468; - let era = if z >= 0 { z } else { z - 146_096 } / 146_097; - let doe = (z - era * 146_097) as u32; - let yoe = (doe - doe / 1460 + doe / 36524 - doe / 146_096) / 365; - let year = yoe as i32 + (era * 400) as i32; - let doy = doe - (365 * yoe + yoe / 4 - yoe / 100); - let mp = (5 * doy + 2) / 153; - let month = if mp < 10 { mp + 3 } else { mp - 9 }; - let year = if month <= 2 { year + 1 } else { year }; - - let (dst_start_utc, dst_end_utc) = if year >= 2007 { - // Post-2007: second Sunday of March -> first Sunday of November. - ( - march_second_sunday_utc(year), - november_first_sunday_utc(year), - ) - } else { - // Pre-2007: first Sunday of April -> last Sunday of October. - (april_first_sunday_utc(year), october_last_sunday_utc(year)) - }; - - let epoch_ms_i64 = epoch_ms as i64; - if epoch_ms_i64 >= dst_start_utc && epoch_ms_i64 < dst_end_utc { - -4 * 3_600 * 1_000 // EDT - } else { - -5 * 3_600 * 1_000 // EST - } -} - -/// Epoch ms of the second Sunday of March at 7:00 AM UTC (= 2:00 AM EST). -fn march_second_sunday_utc(year: i32) -> i64 { - // March 1 day-of-week, then find second Sunday. - let mar1 = civil_to_epoch_days(year, 3, 1); - // 1970-01-01 is Thursday. (days + 3) % 7 gives 0=Mon..6=Sun. - let dow = ((mar1 + 3) % 7 + 7) % 7; - let days_to_first_sunday = (6 - dow + 7) % 7; // days from Mar 1 to first Sunday - let second_sunday = mar1 + days_to_first_sunday + 7; // second Sunday - second_sunday * 86_400_000 + 7 * 3_600 * 1_000 // 7:00 AM UTC = 2:00 AM EST -} - -/// Epoch ms of the first Sunday of November at 6:00 AM UTC (= 2:00 AM EDT). -fn november_first_sunday_utc(year: i32) -> i64 { - let nov1 = civil_to_epoch_days(year, 11, 1); - let dow = ((nov1 + 3) % 7 + 7) % 7; - let days_to_first_sunday = (6 - dow + 7) % 7; - let first_sunday = nov1 + days_to_first_sunday; - first_sunday * 86_400_000 + 6 * 3_600 * 1_000 // 6:00 AM UTC = 2:00 AM EDT -} - -/// Epoch ms of the first Sunday of April at 7:00 AM UTC (= 2:00 AM EST). -/// -/// Used for pre-2007 DST start (Uniform Time Act of 1966). -fn april_first_sunday_utc(year: i32) -> i64 { - let apr1 = civil_to_epoch_days(year, 4, 1); - let dow = ((apr1 + 3) % 7 + 7) % 7; - let days_to_first_sunday = (6 - dow + 7) % 7; - let first_sunday = apr1 + days_to_first_sunday; - first_sunday * 86_400_000 + 7 * 3_600 * 1_000 // 7:00 AM UTC = 2:00 AM EST -} - -/// Epoch ms of the last Sunday of October at 6:00 AM UTC (= 2:00 AM EDT). -/// -/// Used for pre-2007 DST end (Uniform Time Act of 1966). -fn october_last_sunday_utc(year: i32) -> i64 { - // Start from October 31 and walk back to find the last Sunday. - let oct31 = civil_to_epoch_days(year, 10, 31); - let dow = ((oct31 + 3) % 7 + 7) % 7; // 0=Mon..6=Sun - let days_back = (dow + 1) % 7; // days back from Oct 31 to last Sunday - let last_sunday = oct31 - days_back; - last_sunday * 86_400_000 + 6 * 3_600 * 1_000 // 6:00 AM UTC = 2:00 AM EDT -} - -/// Convert civil date to days since 1970-01-01 (inverse of the Euclidean algorithm). -// Reason: the Euclidean date algorithm uses intentional signed/unsigned conversions for valid calendar dates. -#[allow(clippy::cast_sign_loss, clippy::cast_possible_wrap)] -fn civil_to_epoch_days(year: i32, month: u32, day: u32) -> i64 { - let y = if month <= 2 { - i64::from(year) - 1 - } else { - i64::from(year) - }; - let m = if month <= 2 { - i64::from(month) + 9 - } else { - i64::from(month) - 3 - }; - let era = if y >= 0 { y } else { y - 399 } / 400; - let yoe = (y - era * 400) as u64; - let doy = (153 * m as u64 + 2) / 5 + u64::from(day) - 1; - let doe = yoe * 365 + yoe / 4 - yoe / 100 + doy; - era * 146_097 + doe as i64 - 719_468 -} - -/// Convert `epoch_ms` to milliseconds-of-day in Eastern Time (DST-aware). -// Reason: ms_of_day fits in i32; epoch_ms is in valid market data range. -#[allow(clippy::cast_possible_wrap, clippy::cast_possible_truncation)] -pub(crate) fn timestamp_to_ms_of_day(epoch_ms: u64) -> i32 { - let offset = eastern_offset_ms(epoch_ms); - let local_ms = epoch_ms as i64 + offset; - (local_ms.rem_euclid(86_400_000)) as i32 -} - -/// Convert `epoch_ms` to YYYYMMDD date integer in Eastern Time (DST-aware). -// Reason: date components fit in i32; epoch_ms is in valid market data range. -#[allow( - clippy::cast_possible_wrap, - clippy::cast_sign_loss, - clippy::cast_possible_truncation -)] -pub(crate) fn timestamp_to_date(epoch_ms: u64) -> i32 { - let offset = eastern_offset_ms(epoch_ms); - let local_secs = (epoch_ms as i64 + offset) / 1_000; - let days = local_secs / 86400 + 719_468; - let era = if days >= 0 { days } else { days - 146_096 } / 146_097; - let doe = (days - era * 146_097) as u32; - let yoe = (doe - doe / 1460 + doe / 36524 - doe / 146_096) / 365; - let y = i64::from(yoe) + era * 400; - let doy = doe - (365 * yoe + yoe / 4 - yoe / 100); - let mp = (5 * doy + 2) / 153; - let d = doy - (153 * mp + 2) / 5 + 1; - let m = if mp < 10 { mp + 3 } else { mp - 9 }; - let y = if m <= 2 { y + 1 } else { y }; - (y as i32) * 10_000 + (m as i32) * 100 + (d as i32) -} - -/// Extract a date (YYYYMMDD) from a `Number` or `Timestamp` cell, strictly. -/// -/// Used by generated parsers when the `date` field maps to a `timestamp` column. -/// `Number` carries the date already in YYYYMMDD form; `Timestamp` is converted -/// to an Eastern-Time YYYYMMDD integer. `NullValue` yields `Ok(None)`; any -/// other type yields `Err(TypeMismatch)`. -/// -/// # Errors -/// -/// Returns [`DecodeError::TypeMismatch`] if the cell is neither a `Number`, -/// `Timestamp`, nor `NullValue` — including the case where the `DataValue` -/// arrived with its `data_type` oneof unset (`observed: "Unset"`), which is a -/// wire-protocol anomaly we fail loud on. Returns [`DecodeError::MissingCell`] -/// only when the row has fewer cells than `idx` (index out of bounds). -// Reason: number values from protobuf fit in i32 for date/integer fields. -#[allow(clippy::cast_possible_truncation)] -pub(crate) fn row_date(row: &proto::DataValueList, idx: usize) -> Result, DecodeError> { - let Some(dv) = row.values.get(idx) else { - return Err(DecodeError::MissingCell { column: idx }); - }; - match dv.data_type.as_ref() { - Some(proto::data_value::DataType::Number(n)) => Ok(Some(*n as i32)), - Some(proto::data_value::DataType::Timestamp(ts)) => { - Ok(Some(timestamp_to_date(ts.epoch_ms))) - } - Some(proto::data_value::DataType::NullValue(_)) => Ok(None), - other => Err(DecodeError::TypeMismatch { - column: idx, - expected: "Number|Timestamp", - observed: observed_name(other), - }), - } -} - -thread_local! { - /// Reusable zstd decompressor **and** output buffer — avoids allocating both - /// a fresh decompressor context and a fresh `Vec` on every call. - /// - /// The decompressor context (~128 KB of zstd internal state) is recycled, and - /// the output buffer retains its capacity across calls so that repeated - /// decompressions of similar-sized payloads hit no allocator at all. - /// - /// We use `decompress_to_buffer` which writes into the pre-existing Vec - /// without reallocating when capacity is sufficient. The final `.clone()` - /// is necessary since we return ownership, but the internal buffer capacity - /// persists across calls — the key win is avoiding repeated alloc/dealloc - /// cycles for the working buffer. - static ZSTD_STATE: RefCell<(zstd::bulk::Decompressor<'static>, Vec)> = RefCell::new(( - // Infallible in practice: zstd decompressor creation only fails on OOM. - // thread_local! does not support Result, so unwrap is intentional here. - zstd::bulk::Decompressor::new().expect("zstd decompressor creation failed (possible OOM)"), - Vec::with_capacity(1024 * 1024), // 1 MB initial capacity - )); -} - -/// Decompress a `ResponseData` payload. Returns the raw protobuf bytes of the `DataTable`. -/// -/// # Unknown compression algorithms -/// -/// Prost's `.algo()` silently maps unknown enum values to the default (None=0), -/// so we check the raw i32 to detect truly unknown algorithms. Without this, -/// an unrecognized algorithm would be treated as uncompressed, producing garbage. -/// -/// # Buffer recycling -/// -/// Uses a thread-local `(Decompressor, Vec)` pair. The `Vec` retains its -/// capacity across calls, so repeated decompressions of similar-sized payloads -/// avoid hitting the allocator for the working buffer. The returned `Vec` -/// is a clone (we must return ownership), but the internal slab persists. -/// # Errors -/// -/// Returns [`Error::Decompress`] if the compression algorithm is unknown or -/// zstd decompression fails. -// Reason: original_size is a protobuf u64 that fits in usize for valid payloads. -#[allow(clippy::cast_possible_truncation)] -pub fn decompress_response(response: &proto::ResponseData) -> Result, Error> { - let algo_raw = response - .compression_description - .as_ref() - .map_or(0, |cd| cd.algo); - - match proto::CompressionAlgo::try_from(algo_raw) { - Ok(proto::CompressionAlgo::None) => Ok(response.compressed_data.clone()), - Ok(proto::CompressionAlgo::Zstd) => { - let original_size = usize::try_from(response.original_size).unwrap_or(0); - ZSTD_STATE.with(|cell| { - let (ref mut dec, ref mut buf) = *cell.borrow_mut(); - buf.clear(); - buf.resize(original_size, 0); - let n = dec - .decompress_to_buffer(&response.compressed_data, buf) - .map_err(|e| Error::Decompress(e.to_string()))?; - buf.truncate(n); - Ok(buf.clone()) - }) - } - _ => Err(Error::Decompress(format!( - "unknown compression algorithm: {algo_raw}" - ))), - } -} - -/// Decode a `ResponseData` into a `DataTable`. -/// -/// # Errors -/// -/// Returns [`Error::Decompress`] if decompression fails or [`Error::Decode`] -/// if protobuf deserialization fails. -pub fn decode_data_table(response: &proto::ResponseData) -> Result { - let bytes = decompress_response(response)?; - let table: proto::DataTable = - prost::Message::decode(bytes.as_slice()).map_err(|e| Error::Decode(e.to_string()))?; - Ok(table) -} - -/// Extract a column of i64 values from a `DataTable` by header name. -#[must_use] -pub fn extract_number_column(table: &proto::DataTable, header: &str) -> Vec> { - let Some(col_idx) = table.headers.iter().position(|h| h == header) else { - return vec![]; - }; - - table - .data_table - .iter() - .map(|row| { - row.values - .get(col_idx) - .and_then(|dv| dv.data_type.as_ref()) - .and_then(|dt| match dt { - proto::data_value::DataType::Number(n) => Some(*n), - _ => None, - }) - }) - .collect() -} - -/// Extract a column of string values from a `DataTable` by header name. -#[must_use] -pub fn extract_text_column(table: &proto::DataTable, header: &str) -> Vec> { - let Some(col_idx) = table.headers.iter().position(|h| h == header) else { - return vec![]; - }; - - table - .data_table - .iter() - .map(|row| { - row.values - .get(col_idx) - .and_then(|dv| dv.data_type.as_ref()) - .and_then(|dt| match dt { - proto::data_value::DataType::Text(s) => Some(s.clone()), - proto::data_value::DataType::Number(n) => Some(n.to_string()), - proto::data_value::DataType::Price(p) => { - Some(format!("{}", tdbe::Price::new(p.value, p.r#type).to_f64())) - } - _ => None, - }) - }) - .collect() -} - -/// Extract a column of Price values from a `DataTable` by header name. -#[must_use] -pub fn extract_price_column(table: &proto::DataTable, header: &str) -> Vec> { - let Some(col_idx) = table.headers.iter().position(|h| h == header) else { - return vec![]; - }; - - table - .data_table - .iter() - .map(|row| { - row.values - .get(col_idx) - .and_then(|dv| dv.data_type.as_ref()) - .and_then(|dt| match dt { - proto::data_value::DataType::Price(p) => { - Some(tdbe::Price::new(p.value, p.r#type)) - } - _ => None, - }) - }) - .collect() -} - -/// Decode an `i32`-valued cell with Java-matching strict semantics. -/// -/// Accepts: -/// - `Number(n)` → `Ok(Some(n as i32))`. -/// - `Timestamp(ts)` → `Ok(Some(ms_of_day))` — v3 MDDS sends time columns as -/// proto `Timestamp`; the parser expects milliseconds-of-day in Eastern Time. -/// - `NullValue` → `Ok(None)`, matching Java `null` return. -/// -/// Any other variant produces [`DecodeError::TypeMismatch`], including the -/// case where the `DataValue` arrived with its `data_type` oneof unset -/// (`observed: "Unset"`) — a wire anomaly we fail loud on. A row shorter than -/// `idx` (index out of bounds) produces [`DecodeError::MissingCell`]. -/// -/// # Errors -/// -/// See variant list above. -// Reason: protocol-defined integer widths from Java FPSS specification. -#[allow(clippy::cast_possible_truncation)] -pub(crate) fn row_number( - row: &proto::DataValueList, - idx: usize, -) -> Result, DecodeError> { - let Some(dv) = row.values.get(idx) else { - return Err(DecodeError::MissingCell { column: idx }); - }; - match dv.data_type.as_ref() { - Some(proto::data_value::DataType::Number(n)) => Ok(Some(*n as i32)), - Some(proto::data_value::DataType::Timestamp(ts)) => { - Ok(Some(timestamp_to_ms_of_day(ts.epoch_ms))) - } - Some(proto::data_value::DataType::NullValue(_)) => Ok(None), - other => Err(DecodeError::TypeMismatch { - column: idx, - expected: "Number|Timestamp", - observed: observed_name(other), - }), - } -} - -/// Extract raw price value from a `Price` cell (test-only helper). -/// -/// `Price(p)` → `Ok(Some(p.value))`; `NullValue` → `Ok(None)`; other types -/// error. Missing cell errors. -/// -/// # Errors -/// -/// See [`row_number`]. -#[cfg(test)] -#[allow(clippy::cast_possible_truncation)] -fn row_price_value(row: &proto::DataValueList, idx: usize) -> Result, DecodeError> { - let Some(dv) = row.values.get(idx) else { - return Err(DecodeError::MissingCell { column: idx }); - }; - match dv.data_type.as_ref() { - Some(proto::data_value::DataType::Price(p)) => Ok(Some(p.value)), - Some(proto::data_value::DataType::NullValue(_)) => Ok(None), - other => Err(DecodeError::TypeMismatch { - column: idx, - expected: "Price", - observed: observed_name(other), - }), - } -} - -/// Extract raw price type from a `Price` cell (test-only helper). -/// -/// # Errors -/// -/// See [`row_price_value`]. -#[cfg(test)] -#[allow(clippy::cast_possible_truncation)] -fn row_price_type(row: &proto::DataValueList, idx: usize) -> Result, DecodeError> { - let Some(dv) = row.values.get(idx) else { - return Err(DecodeError::MissingCell { column: idx }); - }; - match dv.data_type.as_ref() { - Some(proto::data_value::DataType::Price(p)) => Ok(Some(p.r#type)), - Some(proto::data_value::DataType::NullValue(_)) => Ok(None), - other => Err(DecodeError::TypeMismatch { - column: idx, - expected: "Price", - observed: observed_name(other), - }), - } -} - -/// Decode a price-valued cell to `f64`, using the cell's own `price_type`. -/// -/// Accepts both `Price` (the schema type) and `Number` — v3 MDDS occasionally -/// sends whole-dollar quantities as plain `Number` cells where the schema -/// would otherwise expect `Price`. `NullValue` returns `Ok(None)`. -/// -/// # Errors -/// -/// Errors on any other cell type or missing cell. -// Reason: protocol-defined integer widths from Java FPSS specification. -#[allow(clippy::cast_possible_truncation)] -pub(crate) fn row_price_f64( - row: &proto::DataValueList, - idx: usize, -) -> Result, DecodeError> { - let Some(dv) = row.values.get(idx) else { - return Err(DecodeError::MissingCell { column: idx }); - }; - match dv.data_type.as_ref() { - Some(proto::data_value::DataType::Price(p)) => Ok(Some( - tdbe::types::price::Price::new(p.value, p.r#type).to_f64(), - )), - Some(proto::data_value::DataType::Number(n)) => Ok(Some(*n as f64)), - Some(proto::data_value::DataType::NullValue(_)) => Ok(None), - other => Err(DecodeError::TypeMismatch { - column: idx, - expected: "Price|Number", - observed: observed_name(other), - }), - } -} - -/// Decode a text-valued cell. -/// -/// `Text(s)` → `Ok(Some(s))`, `NullValue` → `Ok(None)`. -/// -/// # Errors -/// -/// Errors on any other cell type or missing cell. -pub(crate) fn row_text( - row: &proto::DataValueList, - idx: usize, -) -> Result, DecodeError> { - let Some(dv) = row.values.get(idx) else { - return Err(DecodeError::MissingCell { column: idx }); - }; - match dv.data_type.as_ref() { - Some(proto::data_value::DataType::Text(s)) => Ok(Some(s.clone())), - Some(proto::data_value::DataType::NullValue(_)) => Ok(None), - other => Err(DecodeError::TypeMismatch { - column: idx, - expected: "Text", - observed: observed_name(other), - }), - } -} - -/// Decode an `i64`-valued cell. -/// -/// `Number(n)` → `Ok(Some(n))`; `Price(p)` → scaled with i64-native -/// arithmetic (no f64 hop), so values past `2^53` round-trip bit-exact; -/// `NullValue` → `Ok(None)`. -/// -/// Used by the generated parsers for schema columns typed `i64` — added -/// with the EodTick `volume`/`count` widening (where on high-volume -/// symbols the values exceed `i32::MAX`). -/// -/// `price_type` is clamped to `0..=19` to match -/// [`tdbe::types::price::Price::new`], so the same wire cell decodes -/// identically through this function and [`row_price_f64`]. -/// -/// # Errors -/// -/// Returns `DecodeError::TypeMismatch` for any other cell variant. Returns -/// `DecodeError::MissingCell` for an out-of-bounds column index. Under the -/// clamped `0..=19` price-type contract, scale-up cannot overflow `i64` -/// (max product is `i32::MAX * 10^9 ≈ 2.15e18`, well under `i64::MAX`). -pub(crate) fn row_number_i64( - row: &proto::DataValueList, - idx: usize, -) -> Result, DecodeError> { - let Some(dv) = row.values.get(idx) else { - return Err(DecodeError::MissingCell { column: idx }); - }; - match dv.data_type.as_ref() { - Some(proto::data_value::DataType::Number(n)) => Ok(Some(*n)), - Some(proto::data_value::DataType::Price(p)) => { - // Vendor convention: real_value = value * 10^(type - 10). - // Clamp `type` to 0..=19 to match `tdbe::Price::new`, so the - // same wire cell decodes identically through `row_price_f64` - // and `row_number_i64`. Positive exp scales up; negative exp - // scales down. v == 0 short-circuits to 0 so a zero price - // never trips the scale-up overflow guard. - let v = i64::from(p.value); - if v == 0 { - return Ok(Some(0)); - } - let price_type = p.r#type.clamp(0, 19); - let exp = price_type - 10; - // After clamping, exp ∈ [-10, 9]. Scale-up: i32::MAX * 10^9 - // ≈ 2.147e18 < i64::MAX (≈ 9.22e18), so checked_mul cannot - // overflow. checked_mul preserves the contract anyway. - let scaled = if exp >= 0 { - 10i64 - .checked_pow(exp.unsigned_abs()) - .and_then(|m| v.checked_mul(m)) - } else { - Some(v / 10i64.pow(exp.unsigned_abs())) - }; - match scaled { - Some(n) => Ok(Some(n)), - None => Err(DecodeError::TypeMismatch { - column: idx, - expected: "i64-fitting Price", - observed: "Price overflowing i64", - }), - } - } - Some(proto::data_value::DataType::NullValue(_)) => Ok(None), - other => Err(DecodeError::TypeMismatch { - column: idx, - expected: "Number|Price", - observed: observed_name(other), - }), - } -} - -// Generated code -- parser functions from tick_schema.toml by build.rs. -#[allow(clippy::pedantic)] // Reason: auto-generated parser code, not under our control. -mod decode_generated { - use super::*; - include!(concat!(env!("OUT_DIR"), "/decode_generated.rs")); -} -pub use decode_generated::*; - -/// Borrow the cell at `idx`, returning an error if the row is too short. -fn cell_type( - row: &proto::DataValueList, - idx: usize, -) -> Result, DecodeError> { - let Some(dv) = row.values.get(idx) else { - return Err(DecodeError::MissingCell { column: idx }); - }; - Ok(dv.data_type.as_ref()) -} - -/// Hand-written parser for `OptionContract` that handles the v3 server's -/// text-formatted fields (expiration as ISO date, right as "PUT"/"CALL"). -/// -/// The `expiration` and `right` columns legitimately arrive as either `Number` -/// or `Text` depending on the upstream version, so the parser dispatches on -/// the cell's own type rather than coalescing silently. Mismatched types -/// propagate as [`DecodeError::TypeMismatch`]. -/// -/// # Errors -/// -/// Returns [`DecodeError`] on type mismatch or missing cell. -pub fn parse_option_contracts_v3( - table: &crate::proto::DataTable, -) -> Result, DecodeError> { - let h: Vec<&str> = table - .headers - .iter() - .map(std::string::String::as_str) - .collect(); - - // Same schema-drift guard as the generated parsers: "no contracts today" - // is legitimate, but a rows-present response missing the required `root` - // column is a silent data-loss trap. The wire column is still named - // `root` (or `symbol` via the v3 alias in `decode::HEADER_ALIASES`); the - // `symbol` binding here is the public-API field name documented in the - // v3 vendor migration guide. - let symbol_idx = match find_header(&h, "root") { - Some(i) => i, - None => { - if table.data_table.is_empty() { - return Ok(vec![]); - } - return Err(DecodeError::MissingRequiredHeader { - header: "root", - rows: table.data_table.len(), - available: h.join(","), - }); - } - }; - let exp_idx = find_header(&h, "expiration"); - let strike_idx = find_header(&h, "strike"); - let right_idx = find_header(&h, "right"); - - table - .data_table - .iter() - .map(|row| { - let symbol = row_text(row, symbol_idx)?.unwrap_or_default(); - - // Expiration: `Number` carries YYYYMMDD directly; `Text` carries - // an ISO "2026-04-13" that we parse here. `NullValue` → 0 (legit - // null, coalesce). An unset oneof is a wire anomaly → TypeMismatch. - let expiration = match exp_idx { - Some(i) => match cell_type(row, i)? { - Some(proto::data_value::DataType::Number(n)) => *n as i32, - Some(proto::data_value::DataType::Text(s)) => parse_iso_date(s), - Some(proto::data_value::DataType::NullValue(_)) => 0, - None => { - return Err(DecodeError::TypeMismatch { - column: i, - expected: "Number|Text", - observed: "Unset", - }); - } - other => { - return Err(DecodeError::TypeMismatch { - column: i, - expected: "Number|Text", - observed: observed_name(other), - }); - } - }, - None => 0, - }; - - let strike = match strike_idx { - Some(i) => row_price_f64(row, i)?.unwrap_or(0.0), - None => 0.0, - }; - - // Right: `Number` carries the ASCII code directly; `Text` carries - // "PUT"/"CALL"/"P"/"C". `NullValue` / unknown text → 0. An unset - // oneof is a wire anomaly → TypeMismatch. - let right = match right_idx { - Some(i) => match cell_type(row, i)? { - Some(proto::data_value::DataType::Number(n)) => *n as i32, - Some(proto::data_value::DataType::Text(s)) => match s.as_str() { - "CALL" | "C" => 67, // ASCII 'C' - "PUT" | "P" => 80, // ASCII 'P' - _ => 0, - }, - Some(proto::data_value::DataType::NullValue(_)) => 0, - None => { - return Err(DecodeError::TypeMismatch { - column: i, - expected: "Number|Text", - observed: "Unset", - }); - } - other => { - return Err(DecodeError::TypeMismatch { - column: i, - expected: "Number|Text", - observed: observed_name(other), - }); - } - }, - None => 0, - }; - - Ok(OptionContract { - symbol, - expiration, - strike, - right, - }) - }) - .collect() -} - -/// Parse an ISO date string "2026-04-13" to YYYYMMDD integer 20260413. -// Reason: date parsing with known-safe integer ranges. -#[allow(clippy::cast_possible_truncation, clippy::missing_panics_doc)] -pub(crate) fn parse_iso_date(s: &str) -> i32 { - // Fast path: already numeric (YYYYMMDD) - if let Ok(n) = s.parse::() { - return n; - } - // ISO format: YYYY-MM-DD - let parts: Vec<&str> = s.split('-').collect(); - if parts.len() == 3 { - if let (Ok(y), Ok(m), Ok(d)) = ( - parts[0].parse::(), - parts[1].parse::(), - parts[2].parse::(), - ) { - return y * 10_000 + m * 100 + d; - } - } - 0 -} - -/// Parse a time string "HH:MM:SS" to milliseconds from midnight. -fn parse_time_text(s: &str) -> i32 { - let parts: Vec<&str> = s.split(':').collect(); - if parts.len() == 3 { - if let (Ok(h), Ok(m), Ok(sec)) = ( - parts[0].parse::(), - parts[1].parse::(), - parts[2].parse::(), - ) { - return (h * 3_600 + m * 60 + sec) * 1_000; - } - } - 0 -} - -/// Calendar day status constants. -/// -/// The v3 MDDS server sends a `type` column with text values. We map them to -/// integer constants for the `CalendarDay.status` field: -/// -/// | Server text | Constant | Meaning | -/// |----------------|----------|-----------------------------------| -/// | `"open"` | `0` | Normal trading day | -/// | `"early_close"`| `1` | Early close (e.g. day after Thanksgiving) | -/// | `"full_close"` | `2` | Market closed (holiday) | -/// | `"weekend"` | `3` | Weekend | -/// | (unknown) | `-1` | Unrecognized status text | -pub const CALENDAR_STATUS_OPEN: i32 = 0; -pub const CALENDAR_STATUS_EARLY_CLOSE: i32 = 1; -pub const CALENDAR_STATUS_FULL_CLOSE: i32 = 2; -pub const CALENDAR_STATUS_WEEKEND: i32 = 3; -pub const CALENDAR_STATUS_UNKNOWN: i32 = -1; - -/// Map a v3 calendar `type` text to `(is_open, status)`. -fn calendar_type_text(s: &str) -> (i32, i32) { - match s { - "open" => (1, CALENDAR_STATUS_OPEN), - "early_close" => (1, CALENDAR_STATUS_EARLY_CLOSE), - "full_close" => (0, CALENDAR_STATUS_FULL_CLOSE), - "weekend" => (0, CALENDAR_STATUS_WEEKEND), - _ => (0, CALENDAR_STATUS_UNKNOWN), - } -} - -/// Hand-written parser for `CalendarDay` that handles the v3 server's -/// text-formatted fields. -/// -/// The v3 MDDS server sends calendar data with different column names and types -/// than the generated parser expects: -/// -/// | Schema field | Server header | Server type | Mapping | -/// |--------------|---------------|-------------|---------------------------------------| -/// | `date` | `date` | Text | "2025-01-01" -> 20250101 | -/// | `is_open` | `type` | Text | "`open"/"early_close`" -> 1, else -> 0 | -/// | `open_time` | `open` | Text / Null | "09:30:00" -> 34200000 ms | -/// | `close_time` | `close` | Text / Null | "16:00:00" -> 57600000 ms | -/// | `status` | `type` | Text | See [`CALENDAR_STATUS_OPEN`] etc. | -/// -/// Note: `calendar_on_date` and `calendar_open_today` omit the `date` column. -/// Each column dispatches on the cell's own type rather than coalescing -/// silently — mismatched types propagate as [`DecodeError::TypeMismatch`]. -/// -/// # Errors -/// -/// Returns [`DecodeError`] on type mismatch or missing cell. -pub fn parse_calendar_days_v3( - table: &crate::proto::DataTable, -) -> Result, DecodeError> { - let h: Vec<&str> = table - .headers - .iter() - .map(std::string::String::as_str) - .collect(); - - let date_idx = h.iter().position(|&s| s == "date"); - let type_idx = h.iter().position(|&s| s == "type"); - let open_idx = h.iter().position(|&s| s == "open"); - let close_idx = h.iter().position(|&s| s == "close"); - - table - .data_table - .iter() - .map(|row| { - // date: Number carries YYYYMMDD, Timestamp converts to ET date, - // Text "2025-01-01" parses to YYYYMMDD. `NullValue` → 0 (legit - // null). Unset oneof is a wire anomaly → TypeMismatch. - let date = match date_idx { - Some(i) => match cell_type(row, i)? { - Some(proto::data_value::DataType::Number(n)) => *n as i32, - Some(proto::data_value::DataType::Timestamp(ts)) => { - timestamp_to_date(ts.epoch_ms) - } - Some(proto::data_value::DataType::Text(s)) => parse_iso_date(s), - Some(proto::data_value::DataType::NullValue(_)) => 0, - None => { - return Err(DecodeError::TypeMismatch { - column: i, - expected: "Number|Timestamp|Text", - observed: "Unset", - }); - } - other => { - return Err(DecodeError::TypeMismatch { - column: i, - expected: "Number|Timestamp|Text", - observed: observed_name(other), - }); - } - }, - None => 0, - }; - - // type: Text "open"/"full_close"/"early_close"/"weekend"; Number - // kept as a future-proofing path. `NullValue` → (0, 0). Unset - // oneof is a wire anomaly → TypeMismatch. - let (is_open, status) = match type_idx { - Some(i) => match cell_type(row, i)? { - Some(proto::data_value::DataType::Text(s)) => calendar_type_text(s), - Some(proto::data_value::DataType::Number(n)) => { - let n = *n as i32; - (i32::from(n != 0), n) - } - Some(proto::data_value::DataType::NullValue(_)) => (0, 0), - None => { - return Err(DecodeError::TypeMismatch { - column: i, - expected: "Text|Number", - observed: "Unset", - }); - } - other => { - return Err(DecodeError::TypeMismatch { - column: i, - expected: "Text|Number", - observed: observed_name(other), - }); - } - }, - None => (0, 0), - }; - - let open_time = decode_calendar_time(row, open_idx)?; - let close_time = decode_calendar_time(row, close_idx)?; - - Ok(CalendarDay { - date, - is_open, - open_time, - close_time, - status, - }) - }) - .collect() -} - -/// Decode a calendar `open`/`close` column. `Text "HH:MM:SS"` → ms-of-day; -/// `Number` kept as future-proofing. `NullValue` / absent column → 0. An unset -/// oneof is a wire anomaly → [`DecodeError::TypeMismatch`]. -fn decode_calendar_time( - row: &proto::DataValueList, - idx: Option, -) -> Result { - let Some(i) = idx else { - return Ok(0); - }; - match cell_type(row, i)? { - Some(proto::data_value::DataType::Text(s)) => Ok(parse_time_text(s)), - Some(proto::data_value::DataType::Number(n)) => Ok(*n as i32), - Some(proto::data_value::DataType::NullValue(_)) => Ok(0), - None => Err(DecodeError::TypeMismatch { - column: i, - expected: "Text|Number", - observed: "Unset", - }), - other => Err(DecodeError::TypeMismatch { - column: i, - expected: "Text|Number", - observed: observed_name(other), - }), - } -} -#[cfg(test)] -mod tests { - use super::*; - - /// Build a DataValue containing a Number. - fn dv_number(n: i64) -> proto::DataValue { - proto::DataValue { - data_type: Some(proto::data_value::DataType::Number(n)), - } - } - - /// Build a DataValue containing a Price. - fn dv_price(value: i32, r#type: i32) -> proto::DataValue { - proto::DataValue { - data_type: Some(proto::data_value::DataType::Price(proto::Price { - value, - r#type, - })), - } - } - - /// Build a DataValue containing NullValue. - fn dv_null() -> proto::DataValue { - proto::DataValue { - data_type: Some(proto::data_value::DataType::NullValue(0)), - } - } - - /// Build a DataValue containing a Timestamp. - fn dv_timestamp(epoch_ms: u64) -> proto::DataValue { - proto::DataValue { - data_type: Some(proto::data_value::DataType::Timestamp( - proto::ZonedDateTime { epoch_ms, zone: 0 }, - )), - } - } - - /// Build a DataValue with no data_type set (missing). - fn dv_missing() -> proto::DataValue { - proto::DataValue { data_type: None } - } - - fn row_of(values: Vec) -> proto::DataValueList { - proto::DataValueList { values } - } - - #[test] - fn row_number_returns_value_for_number_cell() { - let row = row_of(vec![dv_number(42)]); - assert_eq!(row_number(&row, 0).unwrap(), Some(42)); - } - - #[test] - fn row_number_returns_none_for_null_cell() { - let row = row_of(vec![dv_null()]); - assert_eq!(row_number(&row, 0).unwrap(), None); - } - - #[test] - fn row_number_errors_on_unset_cell() { - // A DataValue with the oneof unset is a wire-protocol anomaly. - // Java's `PojoMessageUtils.convert` hits the default arm for - // `DATATYPE_NOT_SET` and throws `IllegalArgumentException`; we - // surface it as `TypeMismatch { observed: "Unset" }`. - let row = row_of(vec![dv_missing()]); - assert_eq!( - row_number(&row, 0), - Err(DecodeError::TypeMismatch { - column: 0, - expected: "Number|Timestamp", - observed: "Unset", - }) - ); - } - - #[test] - fn row_number_errors_on_out_of_bounds() { - let row = row_of(vec![]); - assert_eq!( - row_number(&row, 5), - Err(DecodeError::MissingCell { column: 5 }) - ); - } - - #[test] - fn row_number_errors_on_text_cell() { - let row = row_of(vec![dv_text("oops")]); - assert_eq!( - row_number(&row, 0), - Err(DecodeError::TypeMismatch { - column: 0, - expected: "Number|Timestamp", - observed: "Text", - }) - ); - } - - #[test] - fn row_number_errors_on_price_cell() { - let row = row_of(vec![dv_price(12345, 10)]); - assert_eq!( - row_number(&row, 0), - Err(DecodeError::TypeMismatch { - column: 0, - expected: "Number|Timestamp", - observed: "Price", - }) - ); - } - - #[test] - fn row_number_accepts_timestamp_for_time_columns() { - // v3 MDDS sends `ms_of_day` as a Timestamp. - let epoch_ms: u64 = 1_775_050_200_000; // 2026-04-01 09:30 ET - let row = row_of(vec![dv_timestamp(epoch_ms)]); - assert_eq!(row_number(&row, 0).unwrap(), Some(34_200_000)); - } - - #[test] - fn row_text_errors_on_number_cell() { - let row = row_of(vec![dv_number(42)]); - assert_eq!( - row_text(&row, 0), - Err(DecodeError::TypeMismatch { - column: 0, - expected: "Text", - observed: "Number", - }) - ); - } - - #[test] - fn row_price_f64_accepts_number_cell() { - // Documented v3 MDDS behavior: f64 fields may arrive as plain Number. - let row = row_of(vec![dv_number(1_500_000)]); - assert_eq!(row_price_f64(&row, 0).unwrap(), Some(1_500_000.0)); - } - - #[test] - fn row_price_value_returns_value_for_price_cell() { - let row = row_of(vec![dv_price(12345, 10)]); - assert_eq!(row_price_value(&row, 0).unwrap(), Some(12345)); - } - - #[test] - fn row_price_value_returns_none_for_null_cell() { - let row = row_of(vec![dv_null()]); - assert_eq!(row_price_value(&row, 0).unwrap(), None); - } - - #[test] - fn row_price_type_returns_type_for_price_cell() { - let row = row_of(vec![dv_price(12345, 10)]); - assert_eq!(row_price_type(&row, 0).unwrap(), Some(10)); - } - - #[test] - fn row_price_type_returns_none_for_null_cell() { - let row = row_of(vec![dv_null()]); - assert_eq!(row_price_type(&row, 0).unwrap(), None); - } - - #[test] - fn null_cells_dont_corrupt_trade_ticks() { - // Build a minimal DataTable with one row that has a NullValue in a field. - // Note: "price" header triggers Price-typed extraction, so we use a Price cell. - let table = proto::DataTable { - headers: vec![ - "ms_of_day".into(), - "sequence".into(), - "ext_condition1".into(), - "ext_condition2".into(), - "ext_condition3".into(), - "ext_condition4".into(), - "condition".into(), - "size".into(), - "exchange".into(), - "price".into(), - "condition_flags".into(), - "price_flags".into(), - "volume_type".into(), - "records_back".into(), - "date".into(), - ], - data_table: vec![row_of(vec![ - dv_number(34200000), // ms_of_day - dv_number(1), // sequence - dv_null(), // ext_condition1 = NullValue - dv_number(0), // ext_condition2 - dv_number(0), // ext_condition3 - dv_number(0), // ext_condition4 - dv_number(0), // condition - dv_number(100), // size - dv_number(4), // exchange - dv_price(15000, 10), // price (Price-typed because header is "price") - dv_number(0), // condition_flags - dv_number(0), // price_flags - dv_number(0), // volume_type - dv_number(0), // records_back - dv_number(20240301), // date - ])], - }; - - let ticks = parse_trade_ticks(&table).unwrap(); - assert_eq!(ticks.len(), 1); - let tick = &ticks[0]; - assert_eq!(tick.ms_of_day, 34200000); - // NullValue should default to 0, not corrupt subsequent fields. - assert_eq!(tick.ext_condition1, 0); - assert_eq!(tick.size, 100); - assert!((tick.price - 15000.0).abs() < 1e-10); - assert_eq!(tick.date, 20240301); - } - - #[test] - fn extract_number_column_returns_none_for_null() { - let table = proto::DataTable { - headers: vec!["val".into()], - data_table: vec![ - row_of(vec![dv_number(10)]), - row_of(vec![dv_null()]), - row_of(vec![dv_number(30)]), - ], - }; - - let col = extract_number_column(&table, "val"); - assert_eq!(col, vec![Some(10), None, Some(30)]); - } - - #[test] - // Reason: ms_of_day fits in i32; epoch_ms is in valid market data range. - #[allow(clippy::cast_possible_wrap, clippy::cast_possible_truncation)] - fn timestamp_to_ms_of_day_edt() { - // 2026-04-01 09:30:00 ET (EDT, UTC-4) = 2026-04-01 13:30:00 UTC - // epoch_ms for 2026-04-01 13:30:00 UTC - let epoch_ms: u64 = 1_775_050_200_000; // Apr 1 2026, 13:30 UTC - let ms = super::timestamp_to_ms_of_day(epoch_ms); - assert_eq!(ms, 34_200_000, "9:30 AM ET in milliseconds"); - } - - #[test] - // Reason: ms_of_day fits in i32; epoch_ms is in valid market data range. - #[allow(clippy::cast_possible_wrap, clippy::cast_possible_truncation)] - fn timestamp_to_ms_of_day_est() { - // 2026-01-15 09:30:00 ET (EST, UTC-5) = 2026-01-15 14:30:00 UTC - let epoch_ms: u64 = 1_768_487_400_000; - let ms = super::timestamp_to_ms_of_day(epoch_ms); - assert_eq!(ms, 34_200_000, "9:30 AM ET in milliseconds (winter)"); - } - - #[test] - fn timestamp_to_date_edt() { - let epoch_ms: u64 = 1_775_050_200_000; // Apr 1 2026, 13:30 UTC - let date = super::timestamp_to_date(epoch_ms); - assert_eq!(date, 20260401); - } - - #[test] - fn timestamp_to_date_est() { - let epoch_ms: u64 = 1_768_487_400_000; // Jan 15 2026, 14:30 UTC - let date = super::timestamp_to_date(epoch_ms); - assert_eq!(date, 20260115); - } - - #[test] - fn dst_transition_march_2026() { - // 2026 DST starts March 8 (second Sunday of March) - // Before: EST (UTC-5) at 06:59 UTC. After: EDT (UTC-4) at 07:01 UTC. - let before: u64 = 1_772_953_140_000; // Mar 8 2026, 06:59 UTC - assert_eq!(super::eastern_offset_ms(before), -5 * 3_600 * 1_000); - let after: u64 = 1_772_953_260_000; // Mar 8 2026, 07:01 UTC - assert_eq!(super::eastern_offset_ms(after), -4 * 3_600 * 1_000); - } - - #[test] - fn pre2007_dst_summer_uses_old_rules() { - // 2006: old rules apply (first Sunday April -> last Sunday October). - // 2006-07-15 18:00:00 UTC = 2006-07-15 14:00:00 EDT (summer, mid-July). - // This is well within DST under both old and new rules, so EDT (UTC-4). - let epoch_ms: u64 = 1_153_065_600_000; // Jul 15 2006, 18:00 UTC - assert_eq!( - super::eastern_offset_ms(epoch_ms), - -4 * 3_600 * 1_000, - "mid-July 2006 should be EDT under old DST rules" - ); - } - - #[test] - fn pre2007_est_before_april_dst_start() { - // 2006: old rules — DST starts first Sunday of April (April 2, 2006). - // 2006-02-15 15:00:00 UTC = 2006-02-15 10:00:00 EST (winter, mid-Feb). - // Under old rules, February is EST. - let epoch_ms: u64 = 1_140_015_600_000; // Feb 15 2006, 15:00 UTC - assert_eq!( - super::eastern_offset_ms(epoch_ms), - -5 * 3_600 * 1_000, - "mid-February 2006 should be EST under old DST rules" - ); - } - - /// Build a DataValue containing Text. - fn dv_text(s: &str) -> proto::DataValue { - proto::DataValue { - data_type: Some(proto::data_value::DataType::Text(s.to_string())), - } - } - - #[test] - fn parse_calendar_v3_holiday() { - // Simulate calendar_year response for a holiday (full_close). - let table = proto::DataTable { - headers: vec!["date".into(), "type".into(), "open".into(), "close".into()], - data_table: vec![row_of(vec![ - dv_text("2025-01-01"), - dv_text("full_close"), - dv_null(), - dv_null(), - ])], - }; - - let days = parse_calendar_days_v3(&table).unwrap(); - assert_eq!(days.len(), 1); - let d = &days[0]; - assert_eq!(d.date, 20250101); - assert_eq!(d.is_open, 0); - assert_eq!(d.open_time, 0); - assert_eq!(d.close_time, 0); - assert_eq!(d.status, CALENDAR_STATUS_FULL_CLOSE); - } - - #[test] - fn parse_calendar_v3_open_day() { - // Simulate calendar_on_date response for a regular trading day. - // Note: on_date and open_today omit the "date" column. - let table = proto::DataTable { - headers: vec!["type".into(), "open".into(), "close".into()], - data_table: vec![row_of(vec![ - dv_text("open"), - dv_text("09:30:00"), - dv_text("16:00:00"), - ])], - }; - - let days = parse_calendar_days_v3(&table).unwrap(); - assert_eq!(days.len(), 1); - let d = &days[0]; - assert_eq!(d.date, 0); // no date column - assert_eq!(d.is_open, 1); - assert_eq!(d.open_time, 34_200_000); // 9:30 AM = 9*3600+30*60 = 34200 seconds = 34200000 ms - assert_eq!(d.close_time, 57_600_000); // 4:00 PM = 16*3600 = 57600 seconds = 57600000 ms - assert_eq!(d.status, CALENDAR_STATUS_OPEN); - } - - #[test] - fn parse_calendar_v3_early_close() { - // Simulate an early close day (day after Thanksgiving). - let table = proto::DataTable { - headers: vec!["date".into(), "type".into(), "open".into(), "close".into()], - data_table: vec![row_of(vec![ - dv_text("2025-11-28"), - dv_text("early_close"), - dv_text("09:30:00"), - dv_text("13:00:00"), - ])], - }; - - let days = parse_calendar_days_v3(&table).unwrap(); - assert_eq!(days.len(), 1); - let d = &days[0]; - assert_eq!(d.date, 20251128); - assert_eq!(d.is_open, 1); - assert_eq!(d.open_time, 34_200_000); - assert_eq!(d.close_time, 46_800_000); // 1:00 PM = 13*3600 = 46800 seconds = 46800000 ms - assert_eq!(d.status, CALENDAR_STATUS_EARLY_CLOSE); - } - - #[test] - fn parse_calendar_v3_weekend() { - let table = proto::DataTable { - headers: vec!["type".into(), "open".into(), "close".into()], - data_table: vec![row_of(vec![dv_text("weekend"), dv_null(), dv_null()])], - }; - - let days = parse_calendar_days_v3(&table).unwrap(); - assert_eq!(days.len(), 1); - let d = &days[0]; - assert_eq!(d.is_open, 0); - assert_eq!(d.status, CALENDAR_STATUS_WEEKEND); - } - - #[test] - fn parse_time_text_valid() { - assert_eq!(super::parse_time_text("09:30:00"), 34_200_000); - assert_eq!(super::parse_time_text("16:00:00"), 57_600_000); - assert_eq!(super::parse_time_text("13:00:00"), 46_800_000); - assert_eq!(super::parse_time_text("00:00:00"), 0); - } - - #[test] - fn parse_time_text_invalid_returns_zero() { - assert_eq!(super::parse_time_text("invalid"), 0); - assert_eq!(super::parse_time_text(""), 0); - } - - #[test] - fn parse_eod_timestamp_aliases_decode_time_and_date_separately() { - // 2026-04-01 13:30:00 UTC = 2026-04-01 09:30:00 ET (EDT). - let epoch_ms: u64 = 1_775_050_200_000; - let table = proto::DataTable { - headers: vec![ - "timestamp".into(), - "timestamp2".into(), - "open".into(), - "close".into(), - ], - data_table: vec![row_of(vec![ - dv_timestamp(epoch_ms), - dv_timestamp(epoch_ms), - dv_number(15000), - dv_number(15100), - ])], - }; - - let ticks = parse_eod_ticks(&table).unwrap(); - assert_eq!(ticks.len(), 1); - assert_eq!(ticks[0].ms_of_day, 34_200_000); - assert_eq!(ticks[0].ms_of_day2, 34_200_000); - assert_eq!(ticks[0].date, 20260401); - assert!((ticks[0].open - 15000.0).abs() < 1e-10); - assert!((ticks[0].close - 15100.0).abs() < 1e-10); - } - - #[test] - fn row_number_i64_decodes_price_cells() { - // MDDS sends large integer fields as Price cells, not Number cells. - // Price encoding: price_type centered at 10. - // type=10 → value as-is, type=13 → value * 10^3, type=7 → value / 10^3 - // Example: Price { value: 3842, type: 19 } = 3842 * 10^9 = 3_842_000_000_000 - let row = row_of(vec![dv_price(3842, 19)]); - assert_eq!( - row_number_i64(&row, 0).unwrap(), - Some(3_842_000_000_000_i64) - ); - } - - #[test] - fn row_number_i64_still_decodes_number_cells() { - let row = row_of(vec![dv_number(999_999_999)]); - assert_eq!(row_number_i64(&row, 0).unwrap(), Some(999_999_999)); - } - - #[test] - fn row_number_i64_returns_none_for_null() { - let row = row_of(vec![dv_null()]); - assert_eq!(row_number_i64(&row, 0).unwrap(), None); - } - - #[test] - fn row_number_i64_errors_on_text_cell() { - let row = row_of(vec![dv_text("oops")]); - assert_eq!( - row_number_i64(&row, 0), - Err(DecodeError::TypeMismatch { - column: 0, - expected: "Number|Price", - observed: "Text", - }) - ); - } - - /// Pin a Price cell past `2^53` to the i64-native result for `type=17`. - #[test] - fn row_number_i64_price_cell_returns_bit_exact_i64() { - let row = row_of(vec![dv_price(1_073_741_823, 17)]); - let got = row_number_i64(&row, 0).unwrap().expect("Some"); - assert_eq!(got, 10_737_418_230_000_000_i64); - assert!(got > (1_i64 << 53)); - } - - /// `value == 0` decodes to 0 regardless of the exponent. Mathematically - /// the product is zero; the decoder must not reject a zero cell, even - /// when `price_type` is at the clamp boundary. - #[test] - fn row_number_i64_price_zero_value_short_circuits() { - let row = row_of(vec![dv_price(0, 19)]); - assert_eq!(row_number_i64(&row, 0), Ok(Some(0))); - } - - /// `row_number_i64` and `row_price_f64` must agree on the same wire - /// cell. With `type=19` (in-range) and `value=42`, `row_price_f64` - /// routes through `Price::new` which keeps `price_type=19`, and - /// `row_number_i64` produces the i64-native scale. Both should match. - /// Manual: 42 * 10^(19-10) = 42 * 10^9 = 42_000_000_000. - #[test] - fn row_number_i64_matches_row_price_f64_at_type_19() { - let row = row_of(vec![dv_price(42, 19)]); - let as_int = row_number_i64(&row, 0).unwrap().expect("Some"); - let as_float = row_price_f64(&row, 0).unwrap().expect("Some"); - assert_eq!(as_int, 42_000_000_000_i64); - assert!((as_float - 42_000_000_000.0_f64).abs() < 1.0); - } - - /// `price_type=20` is out-of-range; both decoders must clamp to 19 - /// (matching `Price::new`). A `type=20` cell and a `type=19` cell with - /// the same value must therefore decode to the same i64. - #[test] - fn row_number_i64_clamps_price_type_above_19() { - let row_clamped = row_of(vec![dv_price(7, 20)]); - let row_in_range = row_of(vec![dv_price(7, 19)]); - assert_eq!( - row_number_i64(&row_clamped, 0).unwrap(), - row_number_i64(&row_in_range, 0).unwrap(), - ); - // Pin the absolute value too: 7 * 10^9 = 7_000_000_000. - assert_eq!( - row_number_i64(&row_clamped, 0).unwrap(), - Some(7_000_000_000_i64) - ); - } - - /// Maximum scale-up under the clamped contract: `value=i32::MAX, - /// type=19` yields `i32::MAX * 10^9 = 2_147_483_647_000_000_000`, - /// which is below `i64::MAX = 9_223_372_036_854_775_807`. The product - /// must fit and decode bit-exact (no `TypeMismatch`). - #[test] - fn row_number_i64_max_in_range_price_fits_i64() { - let row = row_of(vec![dv_price(i32::MAX, 19)]); - assert_eq!( - row_number_i64(&row, 0).unwrap(), - Some(2_147_483_647_000_000_000_i64), - ); - } - - #[test] - fn parse_trade_ticks_propagates_type_mismatch() { - // A Text cell in an i32 column is a schema violation — the parser - // must surface it, not silently coerce to 0. - let table = proto::DataTable { - headers: vec!["ms_of_day".into(), "price".into()], - data_table: vec![row_of(vec![dv_text("not-a-number"), dv_price(15000, 10)])], - }; - let err = parse_trade_ticks(&table).unwrap_err(); - assert!( - matches!(err, DecodeError::TypeMismatch { .. }), - "expected TypeMismatch, got {err:?}" - ); - } - - // ─────────── Unset-oneof is an error at every strict decode site ─────────── - // - // A `DataValue` with its `data_type` oneof unset is a wire-protocol - // anomaly (Java's `PojoMessageUtils.convert` default arm throws - // `IllegalArgumentException`). The helpers `row_number` / `row_date` / - // etc. already surface it as `TypeMismatch { observed: "Unset" }`. These - // tests pin the same behaviour on the call-sites that used to coalesce - // `NullValue | None` to zero: `parse_option_contracts_v3`, - // `parse_calendar_days_v3`, the generator-emitted EOD helpers, and the - // generator-emitted contract-id injected `expiration` / `right` fields. - - #[test] - fn parse_option_contracts_v3_errors_on_unset_expiration() { - let table = proto::DataTable { - headers: vec!["root".into(), "expiration".into()], - data_table: vec![row_of(vec![dv_text("AAPL"), dv_missing()])], - }; - assert_eq!( - parse_option_contracts_v3(&table).unwrap_err(), - DecodeError::TypeMismatch { - column: 1, - expected: "Number|Text", - observed: "Unset", - } - ); - } - - #[test] - fn parse_option_contracts_v3_errors_on_unset_right() { - let table = proto::DataTable { - headers: vec!["root".into(), "right".into()], - data_table: vec![row_of(vec![dv_text("AAPL"), dv_missing()])], - }; - assert_eq!( - parse_option_contracts_v3(&table).unwrap_err(), - DecodeError::TypeMismatch { - column: 1, - expected: "Number|Text", - observed: "Unset", - } - ); - } - - #[test] - fn parse_calendar_days_v3_errors_on_unset_date() { - let table = proto::DataTable { - headers: vec!["date".into(), "type".into()], - data_table: vec![row_of(vec![dv_missing(), dv_text("open")])], - }; - assert_eq!( - parse_calendar_days_v3(&table).unwrap_err(), - DecodeError::TypeMismatch { - column: 0, - expected: "Number|Timestamp|Text", - observed: "Unset", - } - ); - } - - #[test] - fn parse_calendar_days_v3_errors_on_unset_open_time() { - // `decode_calendar_time` is the helper covering both `open` and - // `close`; one test pins the shared path. - let table = proto::DataTable { - headers: vec!["type".into(), "open".into(), "close".into()], - data_table: vec![row_of(vec![ - dv_text("open"), - dv_missing(), - dv_text("16:00:00"), - ])], - }; - assert_eq!( - parse_calendar_days_v3(&table).unwrap_err(), - DecodeError::TypeMismatch { - column: 1, - expected: "Text|Number", - observed: "Unset", - } - ); - } - - #[test] - fn parse_eod_ticks_errors_on_unset_cell() { - // `parse_eod_ticks` is generator-emitted with the `eod_num` / - // `eod_date` / `eod_price` helpers; one test pins the shared path. - let table = proto::DataTable { - headers: vec!["timestamp".into(), "open".into()], - data_table: vec![row_of(vec![dv_missing(), dv_number(15000)])], - }; - let err = parse_eod_ticks(&table).unwrap_err(); - assert_eq!( - err, - DecodeError::TypeMismatch { - column: 0, - expected: "Number|Price|Timestamp", - observed: "Unset", - } - ); - } - - #[test] - fn parse_trade_ticks_errors_on_unset_injected_expiration() { - // `parse_trade_ticks` is generator-emitted with `contract_id = true`; - // an `expiration` header in the server payload triggers the injected - // `expiration` / `strike` / `right` decode. An unset cell there used - // to coalesce to 0; now it must fail loud. - let table = proto::DataTable { - headers: vec!["ms_of_day".into(), "price".into(), "expiration".into()], - data_table: vec![row_of(vec![ - dv_number(34_200_000), - dv_price(15000, 10), - dv_missing(), - ])], - }; - let err = parse_trade_ticks(&table).unwrap_err(); - assert_eq!( - err, - DecodeError::TypeMismatch { - column: 2, - expected: "Number|Text", - observed: "Unset", - } - ); - } - - #[test] - fn parse_trade_ticks_errors_on_unset_injected_right() { - let table = proto::DataTable { - headers: vec!["ms_of_day".into(), "price".into(), "right".into()], - data_table: vec![row_of(vec![ - dv_number(34_200_000), - dv_price(15000, 10), - dv_missing(), - ])], - }; - let err = parse_trade_ticks(&table).unwrap_err(); - assert_eq!( - err, - DecodeError::TypeMismatch { - column: 2, - expected: "Number|Text", - observed: "Unset", - } - ); - } - - #[test] - fn parse_greeks_all_ticks_decodes_price_encoded_greeks() { - // Regression: an earlier strict decode rejected Price cells for Greek - // columns, but the v3 MDDS server sends Greeks as Price-encoded - // values (mirroring Java's `dataValue2Object` -> BigDecimal path). - // Live run #24520486541 on main surfaced this as - // "column 13: expected Number, got Price" - // on `option_snapshot_greeks_first_order::bulk_chain` and peers. - // Pin Price-cell decoding for both IV and a Greek so a future - // strict-Number tightening can't re-break it silently. - let table = proto::DataTable { - headers: vec![ - "ms_of_day".into(), - "implied_volatility".into(), - "delta".into(), - ], - data_table: vec![row_of(vec![ - dv_number(34_200_000), - // IV = 0.1234 encoded with price_type = 6 (value * 10^-4). - dv_price(1234, 6), - // Delta = 0.5 encoded with price_type = 9 (value * 10^-1). - dv_price(5, 9), - ])], - }; - let ticks = parse_greeks_all_ticks(&table).unwrap(); - assert_eq!(ticks.len(), 1); - assert!((ticks[0].implied_volatility - 0.1234).abs() < 1e-10); - assert!((ticks[0].delta - 0.5).abs() < 1e-10); - } - - /// Pin the `implied_vol → implied_volatility` and `underlying_timestamp - /// → underlying_ms_of_day` aliases in `HEADER_ALIASES` (decode.rs:82) by - /// decoding a wire payload whose headers use ONLY the v3 server-side - /// names. If either alias entry is dropped or mistyped, the matching - /// schema field silently zero-defaults via `opt_float` / `opt_number` - /// (see the generated `parse_greeks_all_ticks` body), and this test - /// catches that regression. - /// - /// The companion fixture-driven test - /// `crates/thetadatadx/tests/test_decode_captures.rs::greeks_all_*` - /// can't catch a broken `implied_vol` alias on its own because the - /// captured fixture's `first_row_implied_volatility` is `0.0` — a - /// missing alias and a real zero IV are indistinguishable there. - #[test] - fn parse_greeks_all_ticks_resolves_implied_vol_and_underlying_timestamp_aliases() { - // Headers use the v3 server-side names. Schema names - // (`implied_volatility`, `underlying_ms_of_day`) are deliberately - // absent so the parser MUST resolve them via `HEADER_ALIASES`. - let table = proto::DataTable { - headers: vec![ - "ms_of_day".into(), - "implied_vol".into(), - "underlying_timestamp".into(), - ], - // IV = 0.42 encoded with price_type = 6 (value * 10^-4). - // underlying_timestamp epoch_ms 1_775_050_200_000 corresponds - // to 2026-04-01 09:30 ET, which `row_number` converts to - // ms-of-day 34_200_000 (matching `first_row_underlying_ms_of_day` - // in the option_history_greeks_all fixture meta). - data_table: vec![row_of(vec![ - dv_number(34_200_000), - dv_price(4200, 6), - dv_timestamp(1_775_050_200_000), - ])], - }; - let ticks = parse_greeks_all_ticks(&table).unwrap(); - assert_eq!(ticks.len(), 1); - let t = &ticks[0]; - - // Non-zero IV proves the `implied_vol` alias resolved; a broken - // alias would produce 0.0 from the `opt_float(None)` arm. - assert!( - (t.implied_volatility - 0.42).abs() < 1e-9, - "implied_vol alias did not resolve: got {}", - t.implied_volatility, - ); - // Non-zero ms-of-day proves the `underlying_timestamp` alias - // resolved; a broken alias would produce 0 from `opt_number(None)`. - assert_eq!(t.underlying_ms_of_day, 34_200_000); - } - - #[test] - fn parse_greeks_all_ticks_still_decodes_number_cells() { - // Companion to the Price-cell regression test: Number cells must - // still decode, matching Java's dispatch-on-wire-type semantics. - let table = proto::DataTable { - headers: vec!["ms_of_day".into(), "implied_volatility".into()], - data_table: vec![row_of(vec![dv_number(34_200_000), dv_number(0)])], - }; - let ticks = parse_greeks_all_ticks(&table).unwrap(); - assert_eq!(ticks.len(), 1); - assert!(ticks[0].implied_volatility.abs() < 1e-10); - } - - /// Vendor wire shape for `option_*_greeks_first_order`: only the seven - /// first-order columns plus IV pair — vanna/charm/vomma/veta/speed/ - /// zomma/color/ultima/d1/d2/dual_delta/dual_gamma/vera are absent and - /// must default to `0.0` without surfacing any `find_header` warn. - /// Column layout pinned to `scripts/upstream_openapi.yaml` schema - /// `items_option_snapshot_greeks_first_order`. - #[test] - fn parse_greeks_all_ticks_decodes_first_order_subset_with_silent_gaps() { - let table = proto::DataTable { - headers: vec![ - "ms_of_day".into(), - "implied_volatility".into(), - "delta".into(), - "theta".into(), - "vega".into(), - "rho".into(), - "epsilon".into(), - "lambda".into(), - "iv_error".into(), - "date".into(), - ], - data_table: vec![row_of(vec![ - dv_number(34_200_000), - dv_price(2142, 6), // implied_volatility = 0.2142 - dv_price(5023, 6), // delta = 0.5023 - dv_price(-114, 6), // theta = -0.0114 - dv_price(8741, 6), // vega = 0.8741 - dv_price(13598, 6), // rho = 1.3598 - dv_price(-1976, 6), // epsilon = -0.1976 - dv_price(32052, 6), // lambda = 3.2052 - dv_price(-3, 6), // iv_error = -3 / 10^4 = -0.0003 - dv_number(20_240_614), - ])], - }; - let ticks = parse_greeks_all_ticks(&table).unwrap(); - assert_eq!(ticks.len(), 1); - let t = &ticks[0]; - - // Wire-present columns: bit-exact against the input. - // `dv_price(value, 6)` decodes as `value * 10^(6-10) = value / 10000` - // (see `tdbe::types::price::Price::to_f64`). - assert_eq!(t.ms_of_day, 34_200_000); - assert!((t.implied_volatility - 0.2142).abs() < 1e-9); - assert!((t.delta - 0.5023).abs() < 1e-9); - assert!((t.theta - -0.0114).abs() < 1e-9); - assert!((t.vega - 0.8741).abs() < 1e-9); - assert!((t.rho - 1.3598).abs() < 1e-9); - assert!((t.epsilon - -0.1976).abs() < 1e-9); - assert!((t.lambda - 3.2052).abs() < 1e-9); - assert!((t.iv_error - -0.0003).abs() < 1e-9); - assert_eq!(t.date, 20_240_614); - - // Wire-absent columns: zero-defaulted. These are the columns the - // server does NOT publish for `_greeks_first_order` — `find_header` - // returning `None` for each must NOT yield an error and must NOT - // warn (the pre-fix behaviour spammed eight warn lines per row). - assert_eq!(t.gamma, 0.0); - assert_eq!(t.vanna, 0.0); - assert_eq!(t.charm, 0.0); - assert_eq!(t.vomma, 0.0); - assert_eq!(t.veta, 0.0); - assert_eq!(t.speed, 0.0); - assert_eq!(t.zomma, 0.0); - assert_eq!(t.color, 0.0); - assert_eq!(t.ultima, 0.0); - assert_eq!(t.d1, 0.0); - assert_eq!(t.d2, 0.0); - assert_eq!(t.dual_delta, 0.0); - assert_eq!(t.dual_gamma, 0.0); - assert_eq!(t.vera, 0.0); - } - - /// Vendor wire shape for `option_*_greeks_second_order`: gamma / vanna - /// / charm / vomma / veta plus IV pair. Column layout pinned to - /// upstream OpenAPI schema `items_option_snapshot_greeks_second_order`. - #[test] - fn parse_greeks_all_ticks_decodes_second_order_subset_with_silent_gaps() { - let table = proto::DataTable { - headers: vec![ - "ms_of_day".into(), - "implied_volatility".into(), - "gamma".into(), - "vanna".into(), - "charm".into(), - "vomma".into(), - "veta".into(), - "iv_error".into(), - "date".into(), - ], - data_table: vec![row_of(vec![ - dv_number(34_200_000), - dv_price(2142, 6), // implied_volatility = 0.2142 - dv_price(120, 6), // gamma = 0.012 - dv_price(45, 6), // vanna = 0.0045 - dv_price(-12, 6), // charm = -0.0012 - dv_price(900, 6), // vomma = 0.09 - dv_price(-3, 6), // veta = -0.0003 - dv_price(-3, 6), // iv_error = -0.0003 - dv_number(20_240_614), - ])], - }; - let ticks = parse_greeks_all_ticks(&table).unwrap(); - assert_eq!(ticks.len(), 1); - let t = &ticks[0]; - - assert!((t.gamma - 0.012).abs() < 1e-9); - assert!((t.vanna - 0.0045).abs() < 1e-9); - assert!((t.charm - -0.0012).abs() < 1e-9); - assert!((t.vomma - 0.09).abs() < 1e-9); - assert!((t.veta - -0.0003).abs() < 1e-9); - - // First-order, third-order, and `_all`-only columns are absent - // on the wire and default to 0.0. - assert_eq!(t.delta, 0.0); - assert_eq!(t.speed, 0.0); - assert_eq!(t.zomma, 0.0); - assert_eq!(t.d1, 0.0); - assert_eq!(t.vera, 0.0); - } - - /// Vendor wire shape for `option_*_greeks_third_order`: speed / zomma / - /// color / ultima plus IV pair. This is the exact endpoint the Issue - /// #472 reporter was hitting — `option_snapshot_greeks_third_order` - /// previously emitted eight warn lines per row for the absent - /// first-order / second-order / `_all`-only columns. The test pins the - /// silent-gap behaviour so a future regression of `find_header` back - /// to `tracing::warn!` would surface here as a behavioural change. - /// Column layout pinned to upstream OpenAPI schema - /// `items_option_snapshot_greeks_third_order` (notably `vera` is NOT - /// in the third-order subset; it only ships in `_greeks_all`). - #[test] - fn parse_greeks_all_ticks_decodes_third_order_subset_with_silent_gaps() { - let table = proto::DataTable { - headers: vec![ - "ms_of_day".into(), - "implied_volatility".into(), - "speed".into(), - "zomma".into(), - "color".into(), - "ultima".into(), - "iv_error".into(), - "date".into(), - ], - data_table: vec![row_of(vec![ - dv_number(34_200_000), - dv_price(2142, 6), // implied_volatility = 0.2142 - dv_price(7, 6), // speed = 0.0007 - dv_price(15, 6), // zomma = 0.0015 - dv_price(-2, 6), // color = -0.0002 - dv_price(33, 6), // ultima = 0.0033 - dv_price(-3, 6), // iv_error = -0.0003 - dv_number(20_240_614), - ])], - }; - let ticks = parse_greeks_all_ticks(&table).unwrap(); - assert_eq!(ticks.len(), 1); - let t = &ticks[0]; - - assert!((t.speed - 0.0007).abs() < 1e-9); - assert!((t.zomma - 0.0015).abs() < 1e-9); - assert!((t.color - -0.0002).abs() < 1e-9); - assert!((t.ultima - 0.0033).abs() < 1e-9); - - // Vera is NOT a third-order column on the wire even though the - // generic `GreeksTick` struct carries the field. It must default - // to 0.0 here without warning. - assert_eq!(t.vera, 0.0); - // First-order and second-order columns also absent. - assert_eq!(t.delta, 0.0); - assert_eq!(t.gamma, 0.0); - assert_eq!(t.vanna, 0.0); - assert_eq!(t.d1, 0.0); - assert_eq!(t.dual_gamma, 0.0); - } - - /// `parse_greeks_first_order_ticks` against the column subset the - /// vendor publishes for `option_*_greeks_first_order` -- pinned to - /// `items_option_snapshot_greeks_first_order` in the upstream OpenAPI. - /// Asserts every column the parser fills decodes to the exact value - /// from the input row, and that the underlying-snapshot pair is - /// populated (the column subset is what differs from `_greeks_all`, - /// not the underlying tail). - #[test] - fn parse_greeks_first_order_ticks_decodes_first_order_subset() { - let table = proto::DataTable { - headers: vec![ - "ms_of_day".into(), - "bid".into(), - "ask".into(), - "delta".into(), - "theta".into(), - "vega".into(), - "rho".into(), - "epsilon".into(), - "lambda".into(), - "implied_volatility".into(), - "iv_error".into(), - "underlying_ms_of_day".into(), - "underlying_price".into(), - "date".into(), - ], - data_table: vec![row_of(vec![ - dv_number(34_200_000), - dv_price(15022, 6), // bid = 1.5022 - dv_price(15041, 6), // ask = 1.5041 - dv_price(5023, 6), // delta = 0.5023 - dv_price(-114, 6), // theta = -0.0114 - dv_price(8741, 6), // vega = 0.8741 - dv_price(13598, 6), // rho = 1.3598 - dv_price(-1976, 6), // epsilon = -0.1976 - dv_price(32052, 6), // lambda = 3.2052 - dv_price(2142, 6), // implied_volatility = 0.2142 - dv_price(-3, 6), // iv_error = -0.0003 - dv_number(34_200_001), - dv_price(580025, 6), // underlying_price = 58.0025 - dv_number(20_240_614), - ])], - }; - let ticks = parse_greeks_first_order_ticks(&table).unwrap(); - assert_eq!(ticks.len(), 1); - let t = &ticks[0]; - - assert_eq!(t.ms_of_day, 34_200_000); - assert!((t.bid - 1.5022).abs() < 1e-9); - assert!((t.ask - 1.5041).abs() < 1e-9); - assert!((t.delta - 0.5023).abs() < 1e-9); - assert!((t.theta - -0.0114).abs() < 1e-9); - assert!((t.vega - 0.8741).abs() < 1e-9); - assert!((t.rho - 1.3598).abs() < 1e-9); - assert!((t.epsilon - -0.1976).abs() < 1e-9); - assert!((t.lambda - 3.2052).abs() < 1e-9); - assert!((t.implied_volatility - 0.2142).abs() < 1e-9); - assert!((t.iv_error - -0.0003).abs() < 1e-9); - assert_eq!(t.underlying_ms_of_day, 34_200_001); - assert!((t.underlying_price - 58.0025).abs() < 1e-9); - assert_eq!(t.date, 20_240_614); - } - - /// `parse_greeks_second_order_ticks` against the column subset the - /// vendor publishes for `option_*_greeks_second_order` -- pinned to - /// `items_option_snapshot_greeks_second_order` in the upstream - /// OpenAPI. Second-order Greeks: gamma / vanna / charm / vomma / - /// veta plus the IV pair and the bid/ask quote pair. - #[test] - fn parse_greeks_second_order_ticks_decodes_second_order_subset() { - let table = proto::DataTable { - headers: vec![ - "ms_of_day".into(), - "bid".into(), - "ask".into(), - "gamma".into(), - "vanna".into(), - "charm".into(), - "vomma".into(), - "veta".into(), - "implied_volatility".into(), - "iv_error".into(), - "underlying_ms_of_day".into(), - "underlying_price".into(), - "date".into(), - ], - data_table: vec![row_of(vec![ - dv_number(34_200_000), - dv_price(15022, 6), // bid = 1.5022 - dv_price(15041, 6), // ask = 1.5041 - dv_price(120, 6), // gamma = 0.012 - dv_price(45, 6), // vanna = 0.0045 - dv_price(-12, 6), // charm = -0.0012 - dv_price(900, 6), // vomma = 0.09 - dv_price(-3, 6), // veta = -0.0003 - dv_price(2142, 6), // implied_volatility = 0.2142 - dv_price(-3, 6), // iv_error = -0.0003 - dv_number(34_200_001), - dv_price(580025, 6), - dv_number(20_240_614), - ])], - }; - let ticks = parse_greeks_second_order_ticks(&table).unwrap(); - assert_eq!(ticks.len(), 1); - let t = &ticks[0]; - - assert_eq!(t.ms_of_day, 34_200_000); - assert!((t.bid - 1.5022).abs() < 1e-9); - assert!((t.ask - 1.5041).abs() < 1e-9); - assert!((t.gamma - 0.012).abs() < 1e-9); - assert!((t.vanna - 0.0045).abs() < 1e-9); - assert!((t.charm - -0.0012).abs() < 1e-9); - assert!((t.vomma - 0.09).abs() < 1e-9); - assert!((t.veta - -0.0003).abs() < 1e-9); - assert!((t.implied_volatility - 0.2142).abs() < 1e-9); - assert!((t.iv_error - -0.0003).abs() < 1e-9); - assert_eq!(t.underlying_ms_of_day, 34_200_001); - assert!((t.underlying_price - 58.0025).abs() < 1e-9); - assert_eq!(t.date, 20_240_614); - } - - /// `parse_greeks_third_order_ticks` against the column subset the - /// vendor publishes for `option_*_greeks_third_order` -- pinned to - /// `items_option_snapshot_greeks_third_order` in the upstream - /// OpenAPI. Third-order Greeks: speed / zomma / color / ultima plus - /// the IV pair and the bid/ask quote pair. Notably the wire schema - /// does NOT publish `vera`; the struct does not carry it either. - #[test] - fn parse_greeks_third_order_ticks_decodes_third_order_subset() { - let table = proto::DataTable { - headers: vec![ - "ms_of_day".into(), - "bid".into(), - "ask".into(), - "speed".into(), - "zomma".into(), - "color".into(), - "ultima".into(), - "implied_volatility".into(), - "iv_error".into(), - "underlying_ms_of_day".into(), - "underlying_price".into(), - "date".into(), - ], - data_table: vec![row_of(vec![ - dv_number(34_200_000), - dv_price(15022, 6), // bid = 1.5022 - dv_price(15041, 6), // ask = 1.5041 - dv_price(7, 6), // speed = 0.0007 - dv_price(15, 6), // zomma = 0.0015 - dv_price(-2, 6), // color = -0.0002 - dv_price(33, 6), // ultima = 0.0033 - dv_price(2142, 6), // implied_volatility = 0.2142 - dv_price(-3, 6), // iv_error = -0.0003 - dv_number(34_200_001), - dv_price(580025, 6), - dv_number(20_240_614), - ])], - }; - let ticks = parse_greeks_third_order_ticks(&table).unwrap(); - assert_eq!(ticks.len(), 1); - let t = &ticks[0]; - - assert_eq!(t.ms_of_day, 34_200_000); - assert!((t.bid - 1.5022).abs() < 1e-9); - assert!((t.ask - 1.5041).abs() < 1e-9); - assert!((t.speed - 0.0007).abs() < 1e-9); - assert!((t.zomma - 0.0015).abs() < 1e-9); - assert!((t.color - -0.0002).abs() < 1e-9); - assert!((t.ultima - 0.0033).abs() < 1e-9); - assert!((t.implied_volatility - 0.2142).abs() < 1e-9); - assert!((t.iv_error - -0.0003).abs() < 1e-9); - assert_eq!(t.underlying_ms_of_day, 34_200_001); - assert!((t.underlying_price - 58.0025).abs() < 1e-9); - assert_eq!(t.date, 20_240_614); - } -} diff --git a/crates/thetadatadx/src/fpss/connection.rs b/crates/thetadatadx/src/fpss/connection.rs index f45682ca..8852a91d 100644 --- a/crates/thetadatadx/src/fpss/connection.rs +++ b/crates/thetadatadx/src/fpss/connection.rs @@ -193,21 +193,21 @@ mod tests { #[test] fn production_config_has_four_fpss_hosts() { let config = crate::config::DirectConfig::production(); - assert_eq!(config.fpss_hosts.len(), 4); + assert_eq!(config.fpss.hosts.len(), 4); assert_eq!( - config.fpss_hosts[0], + config.fpss.hosts[0], ("nj-a.thetadata.us".to_string(), 20000) ); assert_eq!( - config.fpss_hosts[1], + config.fpss.hosts[1], ("nj-a.thetadata.us".to_string(), 20001) ); assert_eq!( - config.fpss_hosts[2], + config.fpss.hosts[2], ("nj-b.thetadata.us".to_string(), 20000) ); assert_eq!( - config.fpss_hosts[3], + config.fpss.hosts[3], ("nj-b.thetadata.us".to_string(), 20001) ); } diff --git a/crates/thetadatadx/src/fpss/mod.rs b/crates/thetadatadx/src/fpss/mod.rs index e6f94d21..7113caa9 100644 --- a/crates/thetadatadx/src/fpss/mod.rs +++ b/crates/thetadatadx/src/fpss/mod.rs @@ -28,7 +28,7 @@ //! # use thetadatadx::auth::Credentials; //! # fn example() -> Result<(), thetadatadx::error::Error> { //! let creds = Credentials::new("user@example.com", "pw"); -//! let hosts = thetadatadx::config::DirectConfig::production().fpss_hosts; +//! let hosts = thetadatadx::config::DirectConfig::production().fpss.hosts; //! let client = FpssClient::connect(&creds, &hosts, 4096, Default::default(), Default::default(), true, |event: &FpssEvent| { //! // Runs on the Disruptor consumer thread -- keep it fast. //! // Push to your own queue for heavy processing. @@ -192,7 +192,7 @@ impl FpssClient { /// Source: `FPSSClient.connect()` and `FPSSClient.sendCredentials()`. /// Connect to FPSS streaming servers. /// - /// `hosts` is the FPSS server list from [`crate::config::DirectConfig::fpss_hosts`]. + /// `hosts` is the FPSS server list from [`crate::config::FpssConfig::hosts`]. /// Servers are tried in order until one connects. /// /// `policy` controls auto-reconnect behavior after involuntary disconnect. diff --git a/crates/thetadatadx/src/fpss/protocol.rs b/crates/thetadatadx/src/fpss/protocol/contract.rs similarity index 72% rename from crates/thetadatadx/src/fpss/protocol.rs rename to crates/thetadatadx/src/fpss/protocol/contract.rs index ef640151..33a928af 100644 --- a/crates/thetadatadx/src/fpss/protocol.rs +++ b/crates/thetadatadx/src/fpss/protocol/contract.rs @@ -1,11 +1,4 @@ -//! FPSS message types, contract serialization, and subscription protocol. -//! -//! # Wire protocol (from decompiled Java) -//! -//! ## Message codes (`StreamMsgType` in Java) -//! -//! Source: `StreamMsgType.java` — enum with byte codes for each message direction. -//! See [`tdbe::types::enums::StreamMsgType`] for the Rust enum. +//! Contract identifier, OCC-21 parser, and wire serialization codec. //! //! ## Contract serialization (`Contract.java`) //! @@ -16,62 +9,11 @@ //! [exp_date: i32 BE] [is_call: u8] [strike: i32 BE]` //! //! Source: `Contract.toBytes()` and `Contract.fromBytes()` in decompiled terminal. -//! -//! ## Authentication (`FPSSClient.java`) -//! -//! CREDENTIALS message (code 0) payload: -//! ```text -//! [0x00] [username_len: u16 BE] [username bytes] [password bytes] -//! ``` -//! -//! Source: `FPSSClient.sendCredentials()` in decompiled terminal. -//! -//! ## Subscription (`FPSSClient.java`, `PacketStream.java`) -//! -//! Subscribe payload: `[req_id: i32 BE] [contract bytes]` -//! Full-type subscribe: `[req_id: i32 BE] [sec_type: u8]` (5 bytes, subscribes all of that type) -//! Unsubscribe payload: same format as subscribe, using REMOVE_* codes. -//! -//! Response (code 40): `[req_id: i32 BE] [resp_code: i32 BE]` -//! - 0 = OK, 1 = ERROR, 2 = `MAX_STREAMS`, 3 = `INVALID_PERMS` -//! -//! Source: `PacketStream.addQuote()`, `PacketStream.removeQuote()`, -//! `FPSSClient.onReqResponse()` in decompiled terminal. -use tdbe::types::enums::{RemoveReason, SecType, StreamMsgType, StreamResponseType}; +use tdbe::types::enums::SecType; use crate::error::Error; -/// Maximum payload size for a single FPSS frame (1-byte length field). -/// -/// Source: `PacketStream.java` — `LEN` field is a single unsigned byte. -pub const MAX_PAYLOAD: usize = 255; - -/// Ping interval in milliseconds. -/// -/// Source: `FPSSClient.java` — heartbeat thread sends PING every 100ms after login. -pub const PING_INTERVAL_MS: u64 = 100; - -/// Reconnect delay in milliseconds after `IOException`. -/// -/// Source: `FPSSClient.java` — `RECONNECT_DELAY_MS = 2000`. -pub const RECONNECT_DELAY_MS: u64 = 2_000; - -/// Delay before reconnecting after `TOO_MANY_REQUESTS` disconnect (milliseconds). -/// -/// Source: `FPSSClient.java` — waits 130 seconds on `RemoveReason.TOO_MANY_REQUESTS`. -pub const TOO_MANY_REQUESTS_DELAY_MS: u64 = 130_000; - -/// Socket connect timeout in milliseconds. -/// -/// Source: `FPSSClient.java` — `socket.connect(addr, 2000)`. -pub const CONNECT_TIMEOUT_MS: u64 = 2_000; - -/// Socket read timeout in milliseconds. -/// -/// Source: `FPSSClient.java` — `socket.setSoTimeout(10000)`. -pub const READ_TIMEOUT_MS: u64 = 10_000; - // --------------------------------------------------------------------------- // Contract // --------------------------------------------------------------------------- @@ -724,267 +666,6 @@ impl std::fmt::Display for ContractParseError { impl std::error::Error for ContractParseError {} -// --------------------------------------------------------------------------- -// Credentials payload -// --------------------------------------------------------------------------- - -/// Build the CREDENTIALS (code 0) message payload. -/// -/// # Wire format (from `FPSSClient.sendCredentials()`) -/// -/// ```text -/// [0x00] [username_len: u16 BE] [username bytes] [password bytes] -/// ``` -/// -/// The leading 0x00 byte is a version/flag byte present in the Java source. -/// `username_len` is the byte-length of the username (email), as a big-endian u16. -/// Password bytes follow immediately with no length prefix — the server infers -/// password length from `payload_len - 3 - username_len`. -#[must_use] -pub fn build_credentials_payload(username: &str, password: &str) -> Vec { - let user_bytes = username.as_bytes(); - let pass_bytes = password.as_bytes(); - - // Match Java's `putShort((byte)len)` behavior: the length is first narrowed - // to a byte (i8), then sign-extended to a short (i16). For lengths 0-127 - // this is identical to a plain u16 cast. For lengths 128-255 the sign - // extension sets the high byte to 0xFF. In practice usernames are always - // <128 bytes, but we match the exact wire encoding for correctness. - // Truncation to i8 is intentional: matches Java putShort((byte)len) wire encoding. - #[allow(clippy::cast_possible_truncation, clippy::cast_possible_wrap)] - let user_len = i16::from(user_bytes.len() as i8); - - // 1 (version) + 2 (user_len) + user + pass - let mut buf = Vec::with_capacity(3 + user_bytes.len() + pass_bytes.len()); - buf.push(0x00); // version/flag byte - buf.extend_from_slice(&user_len.to_be_bytes()); - buf.extend_from_slice(user_bytes); - buf.extend_from_slice(pass_bytes); - buf -} - -// --------------------------------------------------------------------------- -// Subscription payloads -// --------------------------------------------------------------------------- - -/// Build a subscription payload for a specific contract. -/// -/// # Wire format (from `PacketStream.addQuote()` / `PacketStream.addTrade()`) -/// -/// ```text -/// [req_id: i32 BE] [contract bytes] -/// ``` -/// -/// The message code (21=QUOTE, 22=TRADE, `23=OPEN_INTEREST`) is set by the caller -/// in the frame header; this function only builds the payload. -/// -/// # Errors -/// -/// Returns [`Error::Config`] if the contract root is empty or longer -/// than 16 bytes, surfacing the Java-parity invariant from -/// [`Contract::try_to_bytes`]. -pub fn build_subscribe_payload(req_id: i32, contract: &Contract) -> Result, Error> { - let contract_bytes = contract.try_to_bytes()?; - let mut buf = Vec::with_capacity(4 + contract_bytes.len()); - buf.extend_from_slice(&req_id.to_be_bytes()); - buf.extend_from_slice(&contract_bytes); - Ok(buf) -} - -/// Build a full-type subscription payload (subscribe to all contracts of a security type). -/// -/// # Wire format (from `PacketStream.java`) -/// -/// ```text -/// [req_id: i32 BE] [sec_type: u8] -/// ``` -/// -/// Total 5 bytes. The server uses the 5-byte length to distinguish this from -/// a per-contract subscription (which is always longer). -#[must_use] -pub fn build_full_type_subscribe_payload(req_id: i32, sec_type: SecType) -> Vec { - let mut buf = Vec::with_capacity(5); - buf.extend_from_slice(&req_id.to_be_bytes()); - buf.push(sec_type as u8); - buf -} - -/// Build the PING (code 10) payload. -/// -/// Source: `FPSSClient.java` — heartbeat sends 1-byte zero payload every 100ms. -#[must_use] -pub fn build_ping_payload() -> Vec { - vec![0x00] -} - -/// Build the STOP (code 32) payload sent by the client on shutdown. -/// -/// Source: `FPSSClient.java` — `sendStop()` sends empty-ish STOP message. -#[must_use] -pub fn build_stop_payload() -> Vec { - vec![0x00] -} - -// --------------------------------------------------------------------------- -// Response parsing -// --------------------------------------------------------------------------- - -/// Parse a `REQ_RESPONSE` (code 40) payload. -/// -/// # Wire format (from `FPSSClient.onReqResponse()`) -/// -/// ```text -/// [req_id: i32 BE] [resp_code: i32 BE] -/// ``` -/// -/// Returns `(req_id, response_type)`. -/// # Errors -/// -/// Returns an error on network, authentication, or parsing failure. -pub fn parse_req_response( - payload: &[u8], -) -> Result<(i32, StreamResponseType), crate::error::Error> { - if payload.len() < 8 { - return Err(crate::error::Error::Fpss { - kind: crate::error::FpssErrorKind::ProtocolError, - message: format!( - "REQ_RESPONSE payload too short: {} bytes, expected 8", - payload.len() - ), - }); - } - - let req_id = i32::from_be_bytes([payload[0], payload[1], payload[2], payload[3]]); - let resp_code = i32::from_be_bytes([payload[4], payload[5], payload[6], payload[7]]); - - let resp_type = match resp_code { - 0 => StreamResponseType::Subscribed, - 1 => StreamResponseType::Error, - 2 => StreamResponseType::MaxStreamsReached, - 3 => StreamResponseType::InvalidPerms, - _ => { - return Err(crate::error::Error::Fpss { - kind: crate::error::FpssErrorKind::ProtocolError, - message: format!("unknown REQ_RESPONSE code: {resp_code}"), - }); - } - }; - - Ok((req_id, resp_type)) -} - -/// Parse a DISCONNECTED (code 12) payload. -/// -/// # Wire format (from `FPSSClient.java`) -/// -/// ```text -/// [reason: i16 BE] -/// ``` -/// -/// 2-byte big-endian `RemoveReason` code. -#[must_use] -pub fn parse_disconnect_reason(payload: &[u8]) -> RemoveReason { - if payload.len() < 2 { - return RemoveReason::Unspecified; - } - let code = i16::from_be_bytes([payload[0], payload[1]]); - match code { - 0 => RemoveReason::InvalidCredentials, - 1 => RemoveReason::InvalidLoginValues, - 2 => RemoveReason::InvalidLoginSize, - 3 => RemoveReason::GeneralValidationError, - 4 => RemoveReason::TimedOut, - 5 => RemoveReason::ClientForcedDisconnect, - 6 => RemoveReason::AccountAlreadyConnected, - 7 => RemoveReason::SessionTokenExpired, - 8 => RemoveReason::InvalidSessionToken, - 9 => RemoveReason::FreeAccount, - 12 => RemoveReason::TooManyRequests, - 13 => RemoveReason::NoStartDate, - 14 => RemoveReason::LoginTimedOut, - 15 => RemoveReason::ServerRestarting, - 16 => RemoveReason::SessionTokenNotFound, - 17 => RemoveReason::ServerUserDoesNotExist, - 18 => RemoveReason::InvalidCredentialsNullUser, - _ => RemoveReason::Unspecified, - } -} - -/// Parse a CONTRACT (code 20) payload. -/// -/// # Wire format (from `FPSSClient.onContract()`) -/// -/// ```text -/// [contract_id: i32 BE] [contract bytes...] -/// ``` -/// -/// The server assigns a numeric `contract_id` used to identify this contract -/// in subsequent QUOTE/TRADE/OHLCVC data messages. The contract bytes use the -/// same serialization as `Contract::to_bytes()`. -/// -/// Returns `(server_assigned_id, contract)`. -/// # Errors -/// -/// Returns an error on network, authentication, or parsing failure. -pub fn parse_contract_message(payload: &[u8]) -> Result<(i32, Contract), crate::error::Error> { - if payload.len() < 5 { - return Err(crate::error::Error::Fpss { - kind: crate::error::FpssErrorKind::ProtocolError, - message: format!("CONTRACT payload too short: {} bytes", payload.len()), - }); - } - - let contract_id = i32::from_be_bytes([payload[0], payload[1], payload[2], payload[3]]); - let (contract, _consumed) = - Contract::from_bytes(&payload[4..]).map_err(|e| crate::error::Error::Fpss { - kind: crate::error::FpssErrorKind::ProtocolError, - message: format!("failed to parse contract: {e}"), - })?; - - Ok((contract_id, contract)) -} - -// --------------------------------------------------------------------------- -// Which message code to use for subscribe/unsubscribe -// --------------------------------------------------------------------------- - -/// Returns the `StreamMsgType` code for subscribing to a given data type. -/// -/// Source: `PacketStream.addQuote()` uses code 21, `addTrade()` uses 22, -/// `addOpenInterest()` uses 23. -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub enum SubscriptionKind { - Quote, - Trade, - OpenInterest, -} - -impl SubscriptionKind { - /// Message code for subscribing (Client->Server). - #[must_use] - pub fn subscribe_code(self) -> StreamMsgType { - match self { - Self::Quote => StreamMsgType::Quote, - Self::Trade => StreamMsgType::Trade, - Self::OpenInterest => StreamMsgType::OpenInterest, - } - } - - /// Message code for unsubscribing (Client->Server). - #[must_use] - pub fn unsubscribe_code(self) -> StreamMsgType { - match self { - Self::Quote => StreamMsgType::RemoveQuote, - Self::Trade => StreamMsgType::RemoveTrade, - Self::OpenInterest => StreamMsgType::RemoveOpenInterest, - } - } -} - -// --------------------------------------------------------------------------- -// Tests -// --------------------------------------------------------------------------- - #[cfg(test)] mod tests { use super::*; @@ -1040,118 +721,6 @@ mod tests { assert_eq!(err, ContractParseError::InvalidSize(2)); } - #[test] - fn credentials_payload_format() { - let payload = build_credentials_payload("user@test.com", "pass123"); - assert_eq!(payload[0], 0x00); // version byte - let user_len = u16::from_be_bytes([payload[1], payload[2]]); - assert_eq!(user_len, 13); // "user@test.com".len() - assert_eq!(&payload[3..16], b"user@test.com"); - assert_eq!(&payload[16..], b"pass123"); - } - - #[test] - fn subscribe_payload_with_stock() { - let contract = Contract::stock("MSFT"); - let payload = build_subscribe_payload(42, &contract).expect("valid root"); - // req_id(4) + contract(1+1+4+1 = 7) = 11 - assert_eq!(payload.len(), 11); - let req_id = i32::from_be_bytes([payload[0], payload[1], payload[2], payload[3]]); - assert_eq!(req_id, 42); - // Rest is the contract bytes - let (parsed, _) = Contract::from_bytes(&payload[4..]).unwrap(); - assert_eq!(parsed, contract); - } - - #[test] - fn build_subscribe_payload_rejects_oversize_root() { - let contract = Contract::stock("ABCDEFGHIJKLMNOPQ"); // 17 chars - let err = build_subscribe_payload(1, &contract).expect_err("too-long root must error"); - match err { - Error::Config(msg) => assert!(msg.contains("too long")), - other => panic!("expected Error::Config, got {other:?}"), - } - } - - #[test] - fn build_subscribe_payload_rejects_empty_root() { - let contract = Contract::stock(""); - let err = build_subscribe_payload(1, &contract).expect_err("empty root must error"); - match err { - Error::Config(msg) => assert!(msg.contains("empty")), - other => panic!("expected Error::Config, got {other:?}"), - } - } - - #[test] - fn full_type_subscribe_payload() { - let payload = build_full_type_subscribe_payload(99, SecType::Stock); - assert_eq!(payload.len(), 5); - let req_id = i32::from_be_bytes([payload[0], payload[1], payload[2], payload[3]]); - assert_eq!(req_id, 99); - assert_eq!(payload[4], SecType::Stock as u8); - } - - #[test] - fn parse_req_response_ok() { - let mut data = Vec::new(); - data.extend_from_slice(&42i32.to_be_bytes()); - data.extend_from_slice(&0i32.to_be_bytes()); // Subscribed - let (req_id, resp) = parse_req_response(&data).unwrap(); - assert_eq!(req_id, 42); - assert_eq!(resp, StreamResponseType::Subscribed); - } - - #[test] - fn parse_req_response_max_streams() { - let mut data = Vec::new(); - data.extend_from_slice(&1i32.to_be_bytes()); - data.extend_from_slice(&2i32.to_be_bytes()); // MaxStreamsReached - let (req_id, resp) = parse_req_response(&data).unwrap(); - assert_eq!(req_id, 1); - assert_eq!(resp, StreamResponseType::MaxStreamsReached); - } - - #[test] - fn parse_req_response_too_short() { - let data = [0u8; 7]; - let err = parse_req_response(&data).unwrap_err(); - assert!(err.to_string().contains("too short")); - } - - #[test] - fn parse_disconnect_reasons() { - let make = |code: i16| { - let bytes = code.to_be_bytes(); - parse_disconnect_reason(&bytes) - }; - - assert_eq!(make(0), RemoveReason::InvalidCredentials); - assert_eq!(make(6), RemoveReason::AccountAlreadyConnected); - assert_eq!(make(12), RemoveReason::TooManyRequests); - assert_eq!(make(15), RemoveReason::ServerRestarting); - assert_eq!(make(-99), RemoveReason::Unspecified); - } - - #[test] - fn parse_disconnect_reason_empty() { - assert_eq!(parse_disconnect_reason(&[]), RemoveReason::Unspecified); - } - - #[test] - fn parse_contract_message_stock() { - // Build a CONTRACT payload: 4-byte id + contract bytes - let contract = Contract::stock("TSLA"); - let contract_bytes = contract.to_bytes(); - let mut payload = Vec::new(); - payload.extend_from_slice(&7i32.to_be_bytes()); - payload.extend_from_slice(&contract_bytes); - - let (id, parsed) = parse_contract_message(&payload).unwrap(); - assert_eq!(id, 7); - assert_eq!(parsed, contract); - } - #[test] fn contract_display_stock() { assert_eq!(Contract::stock("AAPL").to_string(), "AAPL STOCK"); @@ -1163,40 +732,6 @@ mod tests { assert_eq!(c.to_string(), "SPY OPTION 20261218 P 45000"); } - #[test] - fn ping_payload() { - let p = build_ping_payload(); - assert_eq!(p, vec![0x00]); - } - - #[test] - fn subscription_kind_codes() { - assert_eq!( - SubscriptionKind::Quote.subscribe_code(), - StreamMsgType::Quote - ); - assert_eq!( - SubscriptionKind::Quote.unsubscribe_code(), - StreamMsgType::RemoveQuote - ); - assert_eq!( - SubscriptionKind::Trade.subscribe_code(), - StreamMsgType::Trade - ); - assert_eq!( - SubscriptionKind::Trade.unsubscribe_code(), - StreamMsgType::RemoveTrade - ); - assert_eq!( - SubscriptionKind::OpenInterest.subscribe_code(), - StreamMsgType::OpenInterest - ); - assert_eq!( - SubscriptionKind::OpenInterest.unsubscribe_code(), - StreamMsgType::RemoveOpenInterest - ); - } - // -- Java wire-format parity tests ----------------------------------------- // These verify byte-for-byte compatibility with Java's Contract.toBytes(). diff --git a/crates/thetadatadx/src/fpss/protocol/mod.rs b/crates/thetadatadx/src/fpss/protocol/mod.rs new file mode 100644 index 00000000..d9bfa556 --- /dev/null +++ b/crates/thetadatadx/src/fpss/protocol/mod.rs @@ -0,0 +1,118 @@ +//! FPSS message types, contract serialization, and subscription protocol. +//! +//! # Wire protocol (from decompiled Java) +//! +//! ## Message codes (`StreamMsgType` in Java) +//! +//! Source: `StreamMsgType.java` — enum with byte codes for each message direction. +//! See [`tdbe::types::enums::StreamMsgType`] for the Rust enum. +//! +//! ## Contract serialization (`Contract.java`) +//! +//! Contracts are serialized as a compact binary format on the wire: +//! +//! - **Stock/Index**: `[total_size: u8] [root_len: u8] [root ASCII] [sec_type: u8]` +//! - **Option**: `[total_size: u8] [root_len: u8] [root ASCII] [sec_type: u8] +//! [exp_date: i32 BE] [is_call: u8] [strike: i32 BE]` +//! +//! Source: `Contract.toBytes()` and `Contract.fromBytes()` in decompiled terminal. +//! +//! ## Authentication (`FPSSClient.java`) +//! +//! CREDENTIALS message (code 0) payload: +//! ```text +//! [0x00] [username_len: u16 BE] [username bytes] [password bytes] +//! ``` +//! +//! Source: `FPSSClient.sendCredentials()` in decompiled terminal. +//! +//! ## Subscription (`FPSSClient.java`, `PacketStream.java`) +//! +//! Subscribe payload: `[req_id: i32 BE] [contract bytes]` +//! Full-type subscribe: `[req_id: i32 BE] [sec_type: u8]` (5 bytes, subscribes all of that type) +//! Unsubscribe payload: same format as subscribe, using REMOVE_* codes. +//! +//! Response (code 40): `[req_id: i32 BE] [resp_code: i32 BE]` +//! - 0 = OK, 1 = ERROR, 2 = `MAX_STREAMS`, 3 = `INVALID_PERMS` +//! +//! Source: `PacketStream.addQuote()`, `PacketStream.removeQuote()`, +//! `FPSSClient.onReqResponse()` in decompiled terminal. +//! +//! # Sub-modules +//! +//! - [`contract`] — `Contract` struct, OCC-21 parser, wire codec. +//! - [`wire`] — payload builders / parsers (credentials, subscribe, ping, stop, REQ_RESPONSE, CONTRACT, DISCONNECTED). +//! - [`subscription`] — `SubscriptionKind` enum (Quote / Trade / OpenInterest). + +pub mod contract; +pub mod subscription; +pub mod wire; + +pub use self::contract::{Contract, ContractParseError}; +pub use self::subscription::SubscriptionKind; +pub use self::wire::{ + build_credentials_payload, build_full_type_subscribe_payload, build_ping_payload, + build_stop_payload, build_subscribe_payload, parse_contract_message, parse_disconnect_reason, + parse_req_response, +}; + +/// Maximum payload size for a single FPSS frame (1-byte length field). +/// +/// Source: `PacketStream.java` — `LEN` field is a single unsigned byte. +pub const MAX_PAYLOAD: usize = 255; + +/// Ping interval in milliseconds. +/// +/// Source: `FPSSClient.java` — heartbeat thread sends PING every 100ms after login. +pub const PING_INTERVAL_MS: u64 = 100; + +/// Reconnect delay in milliseconds after `IOException`. +/// +/// Source: `FPSSClient.java` — `RECONNECT_DELAY_MS = 2000`. +pub const RECONNECT_DELAY_MS: u64 = 2_000; + +/// Delay before reconnecting after `TOO_MANY_REQUESTS` disconnect (milliseconds). +/// +/// Source: `FPSSClient.java` — waits 130 seconds on `RemoveReason.TOO_MANY_REQUESTS`. +pub const TOO_MANY_REQUESTS_DELAY_MS: u64 = 130_000; + +/// Socket connect timeout in milliseconds. +/// +/// Source: `FPSSClient.java` — `socket.connect(addr, 2000)`. +pub const CONNECT_TIMEOUT_MS: u64 = 2_000; + +/// Socket read timeout in milliseconds. +/// +/// Source: `FPSSClient.java` — `socket.setSoTimeout(10000)`. +pub const READ_TIMEOUT_MS: u64 = 10_000; + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn frame_payload_cap_is_one_byte() { + // PacketStream.java uses a single-byte LEN field; cap MUST be 255. + assert_eq!(MAX_PAYLOAD, 255); + } + + #[test] + fn ping_interval_matches_java_heartbeat() { + // FPSSClient.java sends PING every 100ms after login. + assert_eq!(PING_INTERVAL_MS, 100); + } + + #[test] + fn reconnect_delays_match_java() { + // 2000ms general reconnect, 130s TOO_MANY_REQUESTS cooldown. + assert_eq!(RECONNECT_DELAY_MS, 2_000); + assert_eq!(TOO_MANY_REQUESTS_DELAY_MS, 130_000); + } + + #[test] + fn socket_timeouts_match_java() { + // FPSSClient.java: socket.connect(addr, 2000), setSoTimeout(10000). + assert_eq!(CONNECT_TIMEOUT_MS, 2_000); + assert_eq!(READ_TIMEOUT_MS, 10_000); + } +} diff --git a/crates/thetadatadx/src/fpss/protocol/subscription.rs b/crates/thetadatadx/src/fpss/protocol/subscription.rs new file mode 100644 index 00000000..0b8161b1 --- /dev/null +++ b/crates/thetadatadx/src/fpss/protocol/subscription.rs @@ -0,0 +1,72 @@ +//! Subscription kind classification for FPSS subscribe / unsubscribe paths. +//! +//! Source: `PacketStream.addQuote()` uses code 21, `addTrade()` uses 22, +//! `addOpenInterest()` uses 23. + +use tdbe::types::enums::StreamMsgType; + +/// Returns the `StreamMsgType` code for subscribing to a given data type. +/// +/// Source: `PacketStream.addQuote()` uses code 21, `addTrade()` uses 22, +/// `addOpenInterest()` uses 23. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum SubscriptionKind { + Quote, + Trade, + OpenInterest, +} + +impl SubscriptionKind { + /// Message code for subscribing (Client->Server). + #[must_use] + pub fn subscribe_code(self) -> StreamMsgType { + match self { + Self::Quote => StreamMsgType::Quote, + Self::Trade => StreamMsgType::Trade, + Self::OpenInterest => StreamMsgType::OpenInterest, + } + } + + /// Message code for unsubscribing (Client->Server). + #[must_use] + pub fn unsubscribe_code(self) -> StreamMsgType { + match self { + Self::Quote => StreamMsgType::RemoveQuote, + Self::Trade => StreamMsgType::RemoveTrade, + Self::OpenInterest => StreamMsgType::RemoveOpenInterest, + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn subscription_kind_codes() { + assert_eq!( + SubscriptionKind::Quote.subscribe_code(), + StreamMsgType::Quote + ); + assert_eq!( + SubscriptionKind::Quote.unsubscribe_code(), + StreamMsgType::RemoveQuote + ); + assert_eq!( + SubscriptionKind::Trade.subscribe_code(), + StreamMsgType::Trade + ); + assert_eq!( + SubscriptionKind::Trade.unsubscribe_code(), + StreamMsgType::RemoveTrade + ); + assert_eq!( + SubscriptionKind::OpenInterest.subscribe_code(), + StreamMsgType::OpenInterest + ); + assert_eq!( + SubscriptionKind::OpenInterest.unsubscribe_code(), + StreamMsgType::RemoveOpenInterest + ); + } +} diff --git a/crates/thetadatadx/src/fpss/protocol/wire.rs b/crates/thetadatadx/src/fpss/protocol/wire.rs new file mode 100644 index 00000000..51b22738 --- /dev/null +++ b/crates/thetadatadx/src/fpss/protocol/wire.rs @@ -0,0 +1,352 @@ +//! Wire-payload builders and parsers for FPSS messages. +//! +//! Builders cover the client->server direction (credentials, subscribe, +//! full-type subscribe, ping, stop). Parsers cover the server->client +//! responses (REQ_RESPONSE, DISCONNECTED, CONTRACT). +//! +//! Source: `FPSSClient.java`, `PacketStream.java` in decompiled terminal. + +use tdbe::types::enums::{RemoveReason, SecType, StreamResponseType}; + +use super::contract::Contract; +use crate::error::Error; + +// --------------------------------------------------------------------------- +// Credentials payload +// --------------------------------------------------------------------------- + +/// Build the CREDENTIALS (code 0) message payload. +/// +/// # Wire format (from `FPSSClient.sendCredentials()`) +/// +/// ```text +/// [0x00] [username_len: u16 BE] [username bytes] [password bytes] +/// ``` +/// +/// The leading 0x00 byte is a version/flag byte present in the Java source. +/// `username_len` is the byte-length of the username (email), as a big-endian u16. +/// Password bytes follow immediately with no length prefix — the server infers +/// password length from `payload_len - 3 - username_len`. +#[must_use] +pub fn build_credentials_payload(username: &str, password: &str) -> Vec { + let user_bytes = username.as_bytes(); + let pass_bytes = password.as_bytes(); + + // Match Java's `putShort((byte)len)` behavior: the length is first narrowed + // to a byte (i8), then sign-extended to a short (i16). For lengths 0-127 + // this is identical to a plain u16 cast. For lengths 128-255 the sign + // extension sets the high byte to 0xFF. In practice usernames are always + // <128 bytes, but we match the exact wire encoding for correctness. + // Truncation to i8 is intentional: matches Java putShort((byte)len) wire encoding. + #[allow(clippy::cast_possible_truncation, clippy::cast_possible_wrap)] + let user_len = i16::from(user_bytes.len() as i8); + + // 1 (version) + 2 (user_len) + user + pass + let mut buf = Vec::with_capacity(3 + user_bytes.len() + pass_bytes.len()); + buf.push(0x00); // version/flag byte + buf.extend_from_slice(&user_len.to_be_bytes()); + buf.extend_from_slice(user_bytes); + buf.extend_from_slice(pass_bytes); + buf +} + +// --------------------------------------------------------------------------- +// Subscription payloads +// --------------------------------------------------------------------------- + +/// Build a subscription payload for a specific contract. +/// +/// # Wire format (from `PacketStream.addQuote()` / `PacketStream.addTrade()`) +/// +/// ```text +/// [req_id: i32 BE] [contract bytes] +/// ``` +/// +/// The message code (21=QUOTE, 22=TRADE, `23=OPEN_INTEREST`) is set by the caller +/// in the frame header; this function only builds the payload. +/// +/// # Errors +/// +/// Returns [`Error::Config`] if the contract root is empty or longer +/// than 16 bytes, surfacing the Java-parity invariant from +/// [`Contract::try_to_bytes`]. +pub fn build_subscribe_payload(req_id: i32, contract: &Contract) -> Result, Error> { + let contract_bytes = contract.try_to_bytes()?; + let mut buf = Vec::with_capacity(4 + contract_bytes.len()); + buf.extend_from_slice(&req_id.to_be_bytes()); + buf.extend_from_slice(&contract_bytes); + Ok(buf) +} + +/// Build a full-type subscription payload (subscribe to all contracts of a security type). +/// +/// # Wire format (from `PacketStream.java`) +/// +/// ```text +/// [req_id: i32 BE] [sec_type: u8] +/// ``` +/// +/// Total 5 bytes. The server uses the 5-byte length to distinguish this from +/// a per-contract subscription (which is always longer). +#[must_use] +pub fn build_full_type_subscribe_payload(req_id: i32, sec_type: SecType) -> Vec { + let mut buf = Vec::with_capacity(5); + buf.extend_from_slice(&req_id.to_be_bytes()); + buf.push(sec_type as u8); + buf +} + +/// Build the PING (code 10) payload. +/// +/// Source: `FPSSClient.java` — heartbeat sends 1-byte zero payload every 100ms. +#[must_use] +pub fn build_ping_payload() -> Vec { + vec![0x00] +} + +/// Build the STOP (code 32) payload sent by the client on shutdown. +/// +/// Source: `FPSSClient.java` — `sendStop()` sends empty-ish STOP message. +#[must_use] +pub fn build_stop_payload() -> Vec { + vec![0x00] +} + +// --------------------------------------------------------------------------- +// Response parsing +// --------------------------------------------------------------------------- + +/// Parse a `REQ_RESPONSE` (code 40) payload. +/// +/// # Wire format (from `FPSSClient.onReqResponse()`) +/// +/// ```text +/// [req_id: i32 BE] [resp_code: i32 BE] +/// ``` +/// +/// Returns `(req_id, response_type)`. +/// # Errors +/// +/// Returns an error on network, authentication, or parsing failure. +pub fn parse_req_response(payload: &[u8]) -> Result<(i32, StreamResponseType), Error> { + if payload.len() < 8 { + return Err(Error::Fpss { + kind: crate::error::FpssErrorKind::ProtocolError, + message: format!( + "REQ_RESPONSE payload too short: {} bytes, expected 8", + payload.len() + ), + }); + } + + let req_id = i32::from_be_bytes([payload[0], payload[1], payload[2], payload[3]]); + let resp_code = i32::from_be_bytes([payload[4], payload[5], payload[6], payload[7]]); + + let resp_type = match resp_code { + 0 => StreamResponseType::Subscribed, + 1 => StreamResponseType::Error, + 2 => StreamResponseType::MaxStreamsReached, + 3 => StreamResponseType::InvalidPerms, + _ => { + return Err(Error::Fpss { + kind: crate::error::FpssErrorKind::ProtocolError, + message: format!("unknown REQ_RESPONSE code: {resp_code}"), + }); + } + }; + + Ok((req_id, resp_type)) +} + +/// Parse a DISCONNECTED (code 12) payload. +/// +/// # Wire format (from `FPSSClient.java`) +/// +/// ```text +/// [reason: i16 BE] +/// ``` +/// +/// 2-byte big-endian `RemoveReason` code. +#[must_use] +pub fn parse_disconnect_reason(payload: &[u8]) -> RemoveReason { + if payload.len() < 2 { + return RemoveReason::Unspecified; + } + let code = i16::from_be_bytes([payload[0], payload[1]]); + match code { + 0 => RemoveReason::InvalidCredentials, + 1 => RemoveReason::InvalidLoginValues, + 2 => RemoveReason::InvalidLoginSize, + 3 => RemoveReason::GeneralValidationError, + 4 => RemoveReason::TimedOut, + 5 => RemoveReason::ClientForcedDisconnect, + 6 => RemoveReason::AccountAlreadyConnected, + 7 => RemoveReason::SessionTokenExpired, + 8 => RemoveReason::InvalidSessionToken, + 9 => RemoveReason::FreeAccount, + 12 => RemoveReason::TooManyRequests, + 13 => RemoveReason::NoStartDate, + 14 => RemoveReason::LoginTimedOut, + 15 => RemoveReason::ServerRestarting, + 16 => RemoveReason::SessionTokenNotFound, + 17 => RemoveReason::ServerUserDoesNotExist, + 18 => RemoveReason::InvalidCredentialsNullUser, + _ => RemoveReason::Unspecified, + } +} + +/// Parse a CONTRACT (code 20) payload. +/// +/// # Wire format (from `FPSSClient.onContract()`) +/// +/// ```text +/// [contract_id: i32 BE] [contract bytes...] +/// ``` +/// +/// The server assigns a numeric `contract_id` used to identify this contract +/// in subsequent QUOTE/TRADE/OHLCVC data messages. The contract bytes use the +/// same serialization as `Contract::to_bytes()`. +/// +/// Returns `(server_assigned_id, contract)`. +/// # Errors +/// +/// Returns an error on network, authentication, or parsing failure. +pub fn parse_contract_message(payload: &[u8]) -> Result<(i32, Contract), Error> { + if payload.len() < 5 { + return Err(Error::Fpss { + kind: crate::error::FpssErrorKind::ProtocolError, + message: format!("CONTRACT payload too short: {} bytes", payload.len()), + }); + } + + let contract_id = i32::from_be_bytes([payload[0], payload[1], payload[2], payload[3]]); + let (contract, _consumed) = Contract::from_bytes(&payload[4..]).map_err(|e| Error::Fpss { + kind: crate::error::FpssErrorKind::ProtocolError, + message: format!("failed to parse contract: {e}"), + })?; + + Ok((contract_id, contract)) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn credentials_payload_format() { + let payload = build_credentials_payload("user@test.com", "pass123"); + assert_eq!(payload[0], 0x00); // version byte + let user_len = u16::from_be_bytes([payload[1], payload[2]]); + assert_eq!(user_len, 13); // "user@test.com".len() + assert_eq!(&payload[3..16], b"user@test.com"); + assert_eq!(&payload[16..], b"pass123"); + } + + #[test] + fn subscribe_payload_with_stock() { + let contract = Contract::stock("MSFT"); + let payload = build_subscribe_payload(42, &contract).expect("valid root"); + // req_id(4) + contract(1+1+4+1 = 7) = 11 + assert_eq!(payload.len(), 11); + let req_id = i32::from_be_bytes([payload[0], payload[1], payload[2], payload[3]]); + assert_eq!(req_id, 42); + // Rest is the contract bytes + let (parsed, _) = Contract::from_bytes(&payload[4..]).unwrap(); + assert_eq!(parsed, contract); + } + + #[test] + fn build_subscribe_payload_rejects_oversize_root() { + let contract = Contract::stock("ABCDEFGHIJKLMNOPQ"); // 17 chars + let err = build_subscribe_payload(1, &contract).expect_err("too-long root must error"); + match err { + Error::Config(msg) => assert!(msg.contains("too long")), + other => panic!("expected Error::Config, got {other:?}"), + } + } + + #[test] + fn build_subscribe_payload_rejects_empty_root() { + let contract = Contract::stock(""); + let err = build_subscribe_payload(1, &contract).expect_err("empty root must error"); + match err { + Error::Config(msg) => assert!(msg.contains("empty")), + other => panic!("expected Error::Config, got {other:?}"), + } + } + + #[test] + fn full_type_subscribe_payload() { + let payload = build_full_type_subscribe_payload(99, SecType::Stock); + assert_eq!(payload.len(), 5); + let req_id = i32::from_be_bytes([payload[0], payload[1], payload[2], payload[3]]); + assert_eq!(req_id, 99); + assert_eq!(payload[4], SecType::Stock as u8); + } + + #[test] + fn parse_req_response_ok() { + let mut data = Vec::new(); + data.extend_from_slice(&42i32.to_be_bytes()); + data.extend_from_slice(&0i32.to_be_bytes()); // Subscribed + let (req_id, resp) = parse_req_response(&data).unwrap(); + assert_eq!(req_id, 42); + assert_eq!(resp, StreamResponseType::Subscribed); + } + + #[test] + fn parse_req_response_max_streams() { + let mut data = Vec::new(); + data.extend_from_slice(&1i32.to_be_bytes()); + data.extend_from_slice(&2i32.to_be_bytes()); // MaxStreamsReached + let (req_id, resp) = parse_req_response(&data).unwrap(); + assert_eq!(req_id, 1); + assert_eq!(resp, StreamResponseType::MaxStreamsReached); + } + + #[test] + fn parse_req_response_too_short() { + let data = [0u8; 7]; + let err = parse_req_response(&data).unwrap_err(); + assert!(err.to_string().contains("too short")); + } + + #[test] + fn parse_disconnect_reasons() { + let make = |code: i16| { + let bytes = code.to_be_bytes(); + parse_disconnect_reason(&bytes) + }; + + assert_eq!(make(0), RemoveReason::InvalidCredentials); + assert_eq!(make(6), RemoveReason::AccountAlreadyConnected); + assert_eq!(make(12), RemoveReason::TooManyRequests); + assert_eq!(make(15), RemoveReason::ServerRestarting); + assert_eq!(make(-99), RemoveReason::Unspecified); + } + + #[test] + fn parse_disconnect_reason_empty() { + assert_eq!(parse_disconnect_reason(&[]), RemoveReason::Unspecified); + } + + #[test] + fn parse_contract_message_stock() { + // Build a CONTRACT payload: 4-byte id + contract bytes + let contract = Contract::stock("TSLA"); + let contract_bytes = contract.to_bytes(); + let mut payload = Vec::new(); + payload.extend_from_slice(&7i32.to_be_bytes()); + payload.extend_from_slice(&contract_bytes); + + let (id, parsed) = parse_contract_message(&payload).unwrap(); + assert_eq!(id, 7); + assert_eq!(parsed, contract); + } + + #[test] + fn ping_payload() { + let p = build_ping_payload(); + assert_eq!(p, vec![0x00]); + } +} diff --git a/crates/thetadatadx/src/lib.rs b/crates/thetadatadx/src/lib.rs index f39eb7bf..a2ee7959 100644 --- a/crates/thetadatadx/src/lib.rs +++ b/crates/thetadatadx/src/lib.rs @@ -93,7 +93,6 @@ pub mod auth; pub mod config; -pub mod decode; pub mod endpoint; pub mod error; pub mod flatfiles; @@ -115,6 +114,12 @@ mod macros; pub mod mdds; +// `decode` is re-exported from `mdds::decode` to preserve the public surface +// (`thetadatadx::decode::*`). Wave 2 split the original decode.rs god-file +// into `mdds/decode/{error, headers, transport, extract, cell, v3}`; the +// re-export keeps existing consumer paths unchanged. +pub use mdds::decode; + /// Generated protobuf types from `mdds.proto` (package `BetaEndpoints`). #[allow(clippy::pedantic)] pub mod proto { diff --git a/crates/thetadatadx/src/macros.rs b/crates/thetadatadx/src/macros.rs index bdc9ffb7..c6363c5b 100644 --- a/crates/thetadatadx/src/macros.rs +++ b/crates/thetadatadx/src/macros.rs @@ -197,7 +197,7 @@ macro_rules! list_endpoint { let _metrics_start = std::time::Instant::now(); let _permit = self.request_semaphore.acquire().await .map_err(|_| Error::Config("request semaphore closed".into()))?; - let policy = self.config().retry_policy; + let policy = self.config().retry; let budget = policy.max_attempts.max(1); let mut refreshed_already = false; let mut last_err: Option = None; @@ -334,7 +334,7 @@ macro_rules! parsed_endpoint { let _metrics_start = std::time::Instant::now(); let _permit = client.request_semaphore.acquire().await .map_err(|_| Error::Config("request semaphore closed".into()))?; - let policy = client.config().retry_policy; + let policy = client.config().retry; let budget = policy.max_attempts.max(1); let mut refreshed_already = false; let mut last_err: Option = None; diff --git a/crates/thetadatadx/src/mdds/client.rs b/crates/thetadatadx/src/mdds/client.rs index 64a22258..696195fd 100644 --- a/crates/thetadatadx/src/mdds/client.rs +++ b/crates/thetadatadx/src/mdds/client.rs @@ -90,10 +90,10 @@ impl MddsClient { /// Returns an error on network, authentication, or parsing failure. pub async fn connect(creds: &Credentials, config: DirectConfig) -> Result { // Step 1: Authenticate against Nexus API using the configured URL - // (env-var / builder overridable). `config.nexus_url` already + // (env-var / builder overridable). `config.auth.nexus_url` already // reflects that precedence via `DirectConfig::production()`. - tracing::info!(nexus_url = %config.nexus_url, "authenticating with Nexus API"); - let auth_resp = auth::authenticate_at(&config.nexus_url, creds).await?; + tracing::info!(nexus_url = %config.auth.nexus_url, "authenticating with Nexus API"); + let auth_resp = auth::authenticate_at(&config.auth.nexus_url, creds).await?; let session_uuid = auth_resp.session_id.clone(); tracing::debug!( @@ -108,17 +108,17 @@ impl MddsClient { let endpoint = tonic::transport::Channel::from_shared(mdds_uri.clone()) .map_err(|e| Error::Config(format!("invalid MDDS URI '{mdds_uri}': {e}")))? - .keep_alive_timeout(Duration::from_secs(config.mdds_keepalive_timeout_secs)) - .http2_keep_alive_interval(Duration::from_secs(config.mdds_keepalive_secs)) + .keep_alive_timeout(Duration::from_secs(config.mdds.keepalive_timeout_secs)) + .http2_keep_alive_interval(Duration::from_secs(config.mdds.keepalive_secs)) .initial_stream_window_size( - u32::try_from(config.mdds_window_size_kb * 1024).unwrap_or(u32::MAX), + u32::try_from(config.mdds.window_size_kb * 1024).unwrap_or(u32::MAX), ) .initial_connection_window_size( - u32::try_from(config.mdds_connection_window_size_kb * 1024).unwrap_or(u32::MAX), + u32::try_from(config.mdds.connection_window_size_kb * 1024).unwrap_or(u32::MAX), ) - .connect_timeout(Duration::from_secs(10)); + .connect_timeout(Duration::from_secs(config.mdds.connect_timeout_secs)); - let endpoint = if config.mdds_tls { + let endpoint = if config.mdds.tls { endpoint.tls_config(tonic::transport::ClientTlsConfig::new().with_enabled_roots())? } else { endpoint @@ -134,28 +134,28 @@ impl MddsClient { // Auto-detect concurrency from subscription tier when config is 0. // Source: Java terminal uses 2^subscription_tier (FREE=1, VALUE=2, STANDARD=4, PRO=8). - let concurrent = if config.mdds_concurrent_requests == 0 { + let concurrent = if config.mdds.concurrent_requests == 0 { auth_resp .user .as_ref() .map_or(2, crate::auth::nexus::AuthUser::max_concurrent_requests) } else { - config.mdds_concurrent_requests + config.mdds.concurrent_requests }; let request_semaphore = Arc::new(tokio::sync::Semaphore::new(concurrent)); tracing::debug!( mdds_concurrent_requests = concurrent, - auto_detected = config.mdds_concurrent_requests == 0, + auto_detected = config.mdds.concurrent_requests == 0, "request semaphore initialized" ); let stock_tier = auth_resp.user.as_ref().and_then(|u| u.stock_subscription); let options_tier = auth_resp.user.as_ref().and_then(|u| u.options_subscription); - let session = SessionToken::new(session_uuid, config.nexus_url.clone(), creds.clone()); - let client_type = config.client_type.clone(); + let session = SessionToken::new(session_uuid, config.auth.nexus_url.clone(), creds.clone()); + let client_type = config.auth.client_type.clone(); Ok(Self { session, @@ -214,7 +214,7 @@ impl MddsClient { BetaThetaTerminalClient::new(self.channel.clone()) // MDDS can return large DataTables (e.g. full day of trades). // Uses the config-specified max message size. - .max_decoding_message_size(self.config.mdds_max_message_size) + .max_decoding_message_size(self.config.mdds.max_message_size) } /// Return a reference to the underlying config for diagnostics. diff --git a/crates/thetadatadx/src/mdds/decode/cell.rs b/crates/thetadatadx/src/mdds/decode/cell.rs new file mode 100644 index 00000000..41cc01c2 --- /dev/null +++ b/crates/thetadatadx/src/mdds/decode/cell.rs @@ -0,0 +1,304 @@ +//! Per-cell strict decoders (`row_*`) and the generated parser surface. +//! +//! Each `row_*` function dispatches on the cell's wire type rather than +//! coalescing silently — wire-protocol anomalies (`DataValue` with the +//! `data_type` oneof unset) surface as +//! [`DecodeError::TypeMismatch { observed: "Unset" }`] rather than collapsing +//! to a default value. +//! +//! The macro-generated `decode_generated` module (assembled by `build.rs` from +//! `tick_schema.toml`) is included from this module; its emitted parser +//! functions reference `crate::decode::*` for the cross-cutting helpers and +//! `tdbe::time::*` for Eastern-time conversion. + +use super::error::observed_name; +use super::error::DecodeError; +use super::headers::find_header; +use super::v3::parse_iso_date; +use crate::proto; +use tdbe::types::tick::{ + CalendarDay, EodTick, GreeksAllTick, GreeksFirstOrderTick, GreeksSecondOrderTick, + GreeksThirdOrderTick, InterestRateTick, IvTick, MarketValueTick, OhlcTick, OpenInterestTick, + OptionContract, PriceTick, QuoteTick, TradeQuoteTick, TradeTick, +}; + +/// Extract a date (YYYYMMDD) from a `Number` or `Timestamp` cell, strictly. +/// +/// Used by generated parsers when the `date` field maps to a `timestamp` column. +/// `Number` carries the date already in YYYYMMDD form; `Timestamp` is converted +/// to an Eastern-Time YYYYMMDD integer. `NullValue` yields `Ok(None)`; any +/// other type yields `Err(TypeMismatch)`. +/// +/// # Errors +/// +/// Returns [`DecodeError::TypeMismatch`] if the cell is neither a `Number`, +/// `Timestamp`, nor `NullValue` — including the case where the `DataValue` +/// arrived with its `data_type` oneof unset (`observed: "Unset"`), which is a +/// wire-protocol anomaly we fail loud on. Returns [`DecodeError::MissingCell`] +/// only when the row has fewer cells than `idx` (index out of bounds). +// Reason: number values from protobuf fit in i32 for date/integer fields. +#[allow(clippy::cast_possible_truncation)] +pub(crate) fn row_date(row: &proto::DataValueList, idx: usize) -> Result, DecodeError> { + let Some(dv) = row.values.get(idx) else { + return Err(DecodeError::MissingCell { column: idx }); + }; + match dv.data_type.as_ref() { + Some(proto::data_value::DataType::Number(n)) => Ok(Some(*n as i32)), + Some(proto::data_value::DataType::Timestamp(ts)) => { + Ok(Some(tdbe::time::timestamp_to_date(ts.epoch_ms))) + } + Some(proto::data_value::DataType::NullValue(_)) => Ok(None), + other => Err(DecodeError::TypeMismatch { + column: idx, + expected: "Number|Timestamp", + observed: observed_name(other), + }), + } +} + +/// Decode an `i32`-valued cell with Java-matching strict semantics. +/// +/// Accepts: +/// - `Number(n)` → `Ok(Some(n as i32))`. +/// - `Timestamp(ts)` → `Ok(Some(ms_of_day))` — v3 MDDS sends time columns as +/// proto `Timestamp`; the parser expects milliseconds-of-day in Eastern Time. +/// - `NullValue` → `Ok(None)`, matching Java `null` return. +/// +/// Any other variant produces [`DecodeError::TypeMismatch`], including the +/// case where the `DataValue` arrived with its `data_type` oneof unset +/// (`observed: "Unset"`) — a wire anomaly we fail loud on. A row shorter than +/// `idx` (index out of bounds) produces [`DecodeError::MissingCell`]. +/// +/// # Errors +/// +/// See variant list above. +// Reason: protocol-defined integer widths from Java FPSS specification. +#[allow(clippy::cast_possible_truncation)] +pub(crate) fn row_number( + row: &proto::DataValueList, + idx: usize, +) -> Result, DecodeError> { + let Some(dv) = row.values.get(idx) else { + return Err(DecodeError::MissingCell { column: idx }); + }; + match dv.data_type.as_ref() { + Some(proto::data_value::DataType::Number(n)) => Ok(Some(*n as i32)), + Some(proto::data_value::DataType::Timestamp(ts)) => { + Ok(Some(tdbe::time::timestamp_to_ms_of_day(ts.epoch_ms))) + } + Some(proto::data_value::DataType::NullValue(_)) => Ok(None), + other => Err(DecodeError::TypeMismatch { + column: idx, + expected: "Number|Timestamp", + observed: observed_name(other), + }), + } +} + +/// Extract raw price value from a `Price` cell (test-only helper). +/// +/// `Price(p)` → `Ok(Some(p.value))`; `NullValue` → `Ok(None)`; other types +/// error. Missing cell errors. +/// +/// # Errors +/// +/// See [`row_number`]. +#[cfg(test)] +#[allow(clippy::cast_possible_truncation)] +pub(crate) fn row_price_value( + row: &proto::DataValueList, + idx: usize, +) -> Result, DecodeError> { + let Some(dv) = row.values.get(idx) else { + return Err(DecodeError::MissingCell { column: idx }); + }; + match dv.data_type.as_ref() { + Some(proto::data_value::DataType::Price(p)) => Ok(Some(p.value)), + Some(proto::data_value::DataType::NullValue(_)) => Ok(None), + other => Err(DecodeError::TypeMismatch { + column: idx, + expected: "Price", + observed: observed_name(other), + }), + } +} + +/// Extract raw price type from a `Price` cell (test-only helper). +/// +/// # Errors +/// +/// See [`row_price_value`]. +#[cfg(test)] +#[allow(clippy::cast_possible_truncation)] +pub(crate) fn row_price_type( + row: &proto::DataValueList, + idx: usize, +) -> Result, DecodeError> { + let Some(dv) = row.values.get(idx) else { + return Err(DecodeError::MissingCell { column: idx }); + }; + match dv.data_type.as_ref() { + Some(proto::data_value::DataType::Price(p)) => Ok(Some(p.r#type)), + Some(proto::data_value::DataType::NullValue(_)) => Ok(None), + other => Err(DecodeError::TypeMismatch { + column: idx, + expected: "Price", + observed: observed_name(other), + }), + } +} + +/// Decode a price-valued cell to `f64`, using the cell's own `price_type`. +/// +/// Accepts both `Price` (the schema type) and `Number` — v3 MDDS occasionally +/// sends whole-dollar quantities as plain `Number` cells where the schema +/// would otherwise expect `Price`. `NullValue` returns `Ok(None)`. +/// +/// # Errors +/// +/// Errors on any other cell type or missing cell. +// Reason: protocol-defined integer widths from Java FPSS specification. +#[allow(clippy::cast_possible_truncation)] +pub(crate) fn row_price_f64( + row: &proto::DataValueList, + idx: usize, +) -> Result, DecodeError> { + let Some(dv) = row.values.get(idx) else { + return Err(DecodeError::MissingCell { column: idx }); + }; + match dv.data_type.as_ref() { + Some(proto::data_value::DataType::Price(p)) => Ok(Some( + tdbe::types::price::Price::new(p.value, p.r#type).to_f64(), + )), + Some(proto::data_value::DataType::Number(n)) => Ok(Some(*n as f64)), + Some(proto::data_value::DataType::NullValue(_)) => Ok(None), + other => Err(DecodeError::TypeMismatch { + column: idx, + expected: "Price|Number", + observed: observed_name(other), + }), + } +} + +/// Decode a text-valued cell. +/// +/// `Text(s)` → `Ok(Some(s))`, `NullValue` → `Ok(None)`. +/// +/// # Errors +/// +/// Errors on any other cell type or missing cell. +pub(crate) fn row_text( + row: &proto::DataValueList, + idx: usize, +) -> Result, DecodeError> { + let Some(dv) = row.values.get(idx) else { + return Err(DecodeError::MissingCell { column: idx }); + }; + match dv.data_type.as_ref() { + Some(proto::data_value::DataType::Text(s)) => Ok(Some(s.clone())), + Some(proto::data_value::DataType::NullValue(_)) => Ok(None), + other => Err(DecodeError::TypeMismatch { + column: idx, + expected: "Text", + observed: observed_name(other), + }), + } +} + +/// Decode an `i64`-valued cell. +/// +/// `Number(n)` → `Ok(Some(n))`; `Price(p)` → scaled with i64-native +/// arithmetic (no f64 hop), so values past `2^53` round-trip bit-exact; +/// `NullValue` → `Ok(None)`. +/// +/// Used by the generated parsers for schema columns typed `i64` — added +/// with the EodTick `volume`/`count` widening (where on high-volume +/// symbols the values exceed `i32::MAX`). +/// +/// `price_type` is clamped to `0..=19` to match +/// [`tdbe::types::price::Price::new`], so the same wire cell decodes +/// identically through this function and [`row_price_f64`]. +/// +/// # Errors +/// +/// Returns `DecodeError::TypeMismatch` for any other cell variant. Returns +/// `DecodeError::MissingCell` for an out-of-bounds column index. Under the +/// clamped `0..=19` price-type contract, scale-up cannot overflow `i64` +/// (max product is `i32::MAX * 10^9 ≈ 2.15e18`, well under `i64::MAX`). +pub(crate) fn row_number_i64( + row: &proto::DataValueList, + idx: usize, +) -> Result, DecodeError> { + let Some(dv) = row.values.get(idx) else { + return Err(DecodeError::MissingCell { column: idx }); + }; + match dv.data_type.as_ref() { + Some(proto::data_value::DataType::Number(n)) => Ok(Some(*n)), + Some(proto::data_value::DataType::Price(p)) => { + // Vendor convention: real_value = value * 10^(type - 10). + // Clamp `type` to 0..=19 to match `tdbe::Price::new`, so the + // same wire cell decodes identically through `row_price_f64` + // and `row_number_i64`. Positive exp scales up; negative exp + // scales down. v == 0 short-circuits to 0 so a zero price + // never trips the scale-up overflow guard. + let v = i64::from(p.value); + if v == 0 { + return Ok(Some(0)); + } + let price_type = p.r#type.clamp(0, 19); + let exp = price_type - 10; + // After clamping, exp ∈ [-10, 9]. Scale-up: i32::MAX * 10^9 + // ≈ 2.147e18 < i64::MAX (≈ 9.22e18), so checked_mul cannot + // overflow. checked_mul preserves the contract anyway. + let scaled = if exp >= 0 { + 10i64 + .checked_pow(exp.unsigned_abs()) + .and_then(|m| v.checked_mul(m)) + } else { + Some(v / 10i64.pow(exp.unsigned_abs())) + }; + match scaled { + Some(n) => Ok(Some(n)), + None => Err(DecodeError::TypeMismatch { + column: idx, + expected: "i64-fitting Price", + observed: "Price overflowing i64", + }), + } + } + Some(proto::data_value::DataType::NullValue(_)) => Ok(None), + other => Err(DecodeError::TypeMismatch { + column: idx, + expected: "Number|Price", + observed: observed_name(other), + }), + } +} + +/// Borrow the cell at `idx`, returning an error if the row is too short. +pub(crate) fn cell_type( + row: &proto::DataValueList, + idx: usize, +) -> Result, DecodeError> { + let Some(dv) = row.values.get(idx) else { + return Err(DecodeError::MissingCell { column: idx }); + }; + Ok(dv.data_type.as_ref()) +} + +// Generated code -- parser functions from tick_schema.toml by build.rs. +// +// The emitted parser bodies reference: +// * `crate::proto::*` for wire types +// * `crate::decode::{observed_name, parse_iso_date, ...}` for shared helpers +// * `tdbe::time::{timestamp_to_ms_of_day, timestamp_to_date}` for ET conversion +// +// All of these resolve through the re-exports in `crate::mdds::decode` (which +// `crate::decode` re-exports at the crate root) so the generator's path +// assumptions remain intact after the split. +#[allow(clippy::pedantic)] // Reason: auto-generated parser code, not under our control. +mod decode_generated { + use super::*; + include!(concat!(env!("OUT_DIR"), "/decode_generated.rs")); +} +pub use decode_generated::*; diff --git a/crates/thetadatadx/src/mdds/decode/error.rs b/crates/thetadatadx/src/mdds/decode/error.rs new file mode 100644 index 00000000..20459021 --- /dev/null +++ b/crates/thetadatadx/src/mdds/decode/error.rs @@ -0,0 +1,74 @@ +//! Per-cell decode errors and `DataType` variant naming for diagnostics. +//! +//! Mirrors the Java terminal's `IllegalArgumentException` path in +//! `PojoMessageUtils.convert`. Schema-drift guards in the generated parsers +//! raise [`DecodeError::MissingRequiredHeader`] when an upstream column is +//! absent, and the streaming accumulator raises +//! [`DecodeError::ChunkHeaderDrift`] when a mid-stream chunk's header set +//! diverges from the first chunk's schema. + +use crate::proto; +use thiserror::Error as ThisError; + +/// Per-cell decode failure. Produced by the `row_*` helpers when a cell does +/// not match the column's declared type, or when the requested column index is +/// past the end of the row. Mirrors the Java terminal's `IllegalArgumentException` +/// path in `PojoMessageUtils.convert`. +#[derive(Debug, ThisError, PartialEq, Eq)] +pub enum DecodeError { + /// Cell exists but its `DataType` variant does not match the declared + /// schema for this column. + #[error("column {column}: expected {expected}, got {observed}")] + TypeMismatch { + column: usize, + expected: &'static str, + observed: &'static str, + }, + /// Row has fewer cells than the requested column index. + #[error("column {column}: missing cell")] + MissingCell { column: usize }, + /// A required header (declared in `tick_schema.toml` under + /// `required = [...]`) is absent from a non-empty `DataTable`. Emitted by + /// the generated parsers when the server has added or renamed the column — + /// surfacing this as an error is the only way to prevent silent data loss + /// when the upstream schema drifts (see `HEADER_ALIASES` for known + /// synonyms). Empty `DataTable`s (no rows) still return `Ok(vec![])` + /// because "no trades today" is a legitimate outcome. + #[error( + "required column `{header}` missing from {rows}-row DataTable; \ + available headers: {available}" + )] + MissingRequiredHeader { + header: &'static str, + rows: usize, + available: String, + }, + /// A mid-stream gRPC chunk carries a header set that does not match the + /// header set established by the first chunk. The stream accumulator + /// used to silently retain the first header set and accumulate rows + /// from every chunk underneath it, which would transparently corrupt + /// a row set if the server's wire schema changed mid-response. This + /// variant surfaces the drift instead of hiding it. + #[error( + "chunk {chunk_index} headers drifted from first-chunk schema; \ + first: [{first}]; chunk: [{chunk}]" + )] + ChunkHeaderDrift { + chunk_index: usize, + first: String, + chunk: String, + }, +} + +/// Name the `DataType` variant for error messages. `None` is treated as a +/// missing `data_type` oneof (protobuf cell with no variant set). +pub(crate) fn observed_name(dt: Option<&proto::data_value::DataType>) -> &'static str { + match dt { + Some(proto::data_value::DataType::Number(_)) => "Number", + Some(proto::data_value::DataType::Text(_)) => "Text", + Some(proto::data_value::DataType::Price(_)) => "Price", + Some(proto::data_value::DataType::Timestamp(_)) => "Timestamp", + Some(proto::data_value::DataType::NullValue(_)) => "NullValue", + None => "Unset", + } +} diff --git a/crates/thetadatadx/src/mdds/decode/extract.rs b/crates/thetadatadx/src/mdds/decode/extract.rs new file mode 100644 index 00000000..c84cf9ae --- /dev/null +++ b/crates/thetadatadx/src/mdds/decode/extract.rs @@ -0,0 +1,79 @@ +//! Column-extraction helpers (Number / Text / Price) over a `DataTable`. +//! +//! These helpers return `Vec>` keyed by the column header. They +//! drive the macro-generated list endpoints in `crate::macros` and the +//! Polars / Arrow column projections. + +use crate::proto; + +/// Extract a column of i64 values from a `DataTable` by header name. +#[must_use] +pub fn extract_number_column(table: &proto::DataTable, header: &str) -> Vec> { + let Some(col_idx) = table.headers.iter().position(|h| h == header) else { + return vec![]; + }; + + table + .data_table + .iter() + .map(|row| { + row.values + .get(col_idx) + .and_then(|dv| dv.data_type.as_ref()) + .and_then(|dt| match dt { + proto::data_value::DataType::Number(n) => Some(*n), + _ => None, + }) + }) + .collect() +} + +/// Extract a column of string values from a `DataTable` by header name. +#[must_use] +pub fn extract_text_column(table: &proto::DataTable, header: &str) -> Vec> { + let Some(col_idx) = table.headers.iter().position(|h| h == header) else { + return vec![]; + }; + + table + .data_table + .iter() + .map(|row| { + row.values + .get(col_idx) + .and_then(|dv| dv.data_type.as_ref()) + .and_then(|dt| match dt { + proto::data_value::DataType::Text(s) => Some(s.clone()), + proto::data_value::DataType::Number(n) => Some(n.to_string()), + proto::data_value::DataType::Price(p) => { + Some(format!("{}", tdbe::Price::new(p.value, p.r#type).to_f64())) + } + _ => None, + }) + }) + .collect() +} + +/// Extract a column of Price values from a `DataTable` by header name. +#[must_use] +pub fn extract_price_column(table: &proto::DataTable, header: &str) -> Vec> { + let Some(col_idx) = table.headers.iter().position(|h| h == header) else { + return vec![]; + }; + + table + .data_table + .iter() + .map(|row| { + row.values + .get(col_idx) + .and_then(|dv| dv.data_type.as_ref()) + .and_then(|dt| match dt { + proto::data_value::DataType::Price(p) => { + Some(tdbe::Price::new(p.value, p.r#type)) + } + _ => None, + }) + }) + .collect() +} diff --git a/crates/thetadatadx/src/mdds/decode/headers.rs b/crates/thetadatadx/src/mdds/decode/headers.rs new file mode 100644 index 00000000..3ada8c64 --- /dev/null +++ b/crates/thetadatadx/src/mdds/decode/headers.rs @@ -0,0 +1,74 @@ +//! Header alias table and lookup helper. +//! +//! v3 MDDS uses different column names than the tick schema. [`HEADER_ALIASES`] +//! maps schema names to their v3 equivalents so generated and hand-written +//! parsers work with both the schema and v3 wire payloads. + +/// Header aliases: v3 MDDS uses different column names than the tick schema. +/// This maps schema names to their v3 equivalents so parsers work with both. +/// +/// Validated against a real v3 MDDS response capture (see +/// `tests/fixtures/captures/`). Each entry is `(schema_name, server_name)`: +/// `find_header("ms_of_day", h)` returns the index of the first matching +/// server column in `h`. +pub(crate) const HEADER_ALIASES: &[(&str, &str)] = &[ + // Generic time column: MDDS sends a proto `Timestamp`, the tick schema + // models it as an i32 ms-of-day. `row_number` handles the conversion. + ("ms_of_day", "timestamp"), + ("ms_of_day", "created"), + // Combined trade + quote responses split the two time columns into + // `trade_timestamp` (the trade side → `ms_of_day`) and `quote_timestamp` + // (the quote side → `quote_ms_of_day`). Without these aliases the + // `TradeQuoteTick` parser falls through the required-header guard and + // produces an empty Vec on ~1M-row responses (P11). + ("ms_of_day", "trade_timestamp"), + ("quote_ms_of_day", "quote_timestamp"), + ("ms_of_day2", "timestamp2"), + ("ms_of_day2", "last_trade"), + ("date", "timestamp"), + ("date", "created"), + ("date", "trade_timestamp"), + // option_list_contracts returns "symbol" where the schema says "root" + ("root", "symbol"), + // v3 uses "implied_vol" where the schema says "implied_volatility" + ("implied_volatility", "implied_vol"), + // The vendor's per-order Greeks endpoints (`option_*_greeks_*_order`) + // and the `_greeks_all` / `_greeks_eod` endpoints publish the + // underlying snapshot timestamp as `underlying_timestamp`. The tick + // schema models it as `underlying_ms_of_day` so the wire conversion + // (Timestamp -> ms-of-day) flows through the standard `row_number` + // path without a per-tick parser branch. + ("underlying_ms_of_day", "underlying_timestamp"), +]; + +/// Helper: find a column index by name, with alias fallback. +/// +/// The v3 MDDS server uses `timestamp` where the tick schema says `ms_of_day`. +/// This function checks the primary name first, then falls back to known aliases. +/// +/// Returns `None` silently when the header is absent — required-header +/// guards in the generated parsers surface a typed +/// [`crate::error::Error::MissingRequiredHeader`] for the must-have columns; +/// optional columns missing from a subset response (e.g. +/// `option_snapshot_greeks_third_order` returning only the third-order Greek +/// columns from the `GreeksTick` union schema) are by design. Header drift +/// can be observed at the `trace` level via `RUST_LOG=thetadatadx=trace`. +pub(crate) fn find_header(headers: &[&str], name: &str) -> Option { + // Try exact match first. + if let Some(pos) = headers.iter().position(|&s| s == name) { + return Some(pos); + } + // Try aliases. + for &(schema_name, server_name) in HEADER_ALIASES { + if name == schema_name { + if let Some(pos) = headers.iter().position(|&s| s == server_name) { + return Some(pos); + } + } + } + tracing::trace!( + header = name, + "column header not present in DataTable (optional or subset response)" + ); + None +} diff --git a/crates/thetadatadx/src/mdds/decode/mod.rs b/crates/thetadatadx/src/mdds/decode/mod.rs new file mode 100644 index 00000000..70560bd0 --- /dev/null +++ b/crates/thetadatadx/src/mdds/decode/mod.rs @@ -0,0 +1,47 @@ +//! `ResponseData` → `DataTable` → tick decoders. +//! +//! Wave 2 split of the original `crates/thetadatadx/src/decode.rs` god-file: +//! +//! | Submodule | Concern | +//! |--------------|---------------------------------------------------------------| +//! | [`error`] | [`DecodeError`] enum + `observed_name` diagnostic helper | +//! | [`headers`] | `HEADER_ALIASES` v3 ↔ schema map + `find_header` lookup | +//! | [`transport`]| `decompress_response` / `decode_data_table` zstd path | +//! | [`extract`] | `extract_{number,text,price}_column` column projections | +//! | [`cell`] | Per-cell strict decoders (`row_*`) + generated parser surface | +//! | [`v3`] | Hand-written v3 parsers (`parse_option_contracts_v3`, …) | +//! +//! Public API surface is preserved at `thetadatadx::decode::*` via the +//! crate-root re-export of this module. Eastern-time / DST primitives +//! previously living here have moved to [`tdbe::time`] and are reused by +//! the FPSS latency path. + +pub mod cell; +pub mod error; +pub mod extract; +pub mod headers; +pub mod transport; +pub mod v3; + +pub use error::DecodeError; +pub use extract::{extract_number_column, extract_price_column, extract_text_column}; +pub use transport::{decode_data_table, decompress_response}; +pub use v3::{ + parse_calendar_days_v3, parse_option_contracts_v3, CALENDAR_STATUS_EARLY_CLOSE, + CALENDAR_STATUS_FULL_CLOSE, CALENDAR_STATUS_OPEN, CALENDAR_STATUS_UNKNOWN, + CALENDAR_STATUS_WEEKEND, +}; + +// Re-export the macro-generated parser functions (`parse_trade_ticks`, +// `parse_eod_ticks`, etc.) at this module's top level so external consumers +// (sdks/python, benches) can keep using `thetadatadx::decode::parse_*`. +pub use cell::*; + +// `observed_name` is `pub(crate)` and intentionally not part of the public +// surface; it stays accessible as `crate::decode::observed_name` via this +// re-export so the generated parser code (emitted by `build.rs` from the +// templates in `build_support/ticks/templates/parser/`) still resolves it. +pub(crate) use error::observed_name; + +#[cfg(test)] +mod tests; diff --git a/crates/thetadatadx/src/mdds/decode/tests.rs b/crates/thetadatadx/src/mdds/decode/tests.rs new file mode 100644 index 00000000..b741e3c4 --- /dev/null +++ b/crates/thetadatadx/src/mdds/decode/tests.rs @@ -0,0 +1,1092 @@ +//! Tests for the row-cell decoders, column extractors, and v3 hand-written +//! parsers. Eastern-time / DST primitive tests live with their canonical +//! home in `tdbe::time`. + +use super::cell::{ + row_number, row_number_i64, row_price_f64, row_price_type, row_price_value, row_text, +}; +use super::v3::{ + parse_calendar_days_v3, parse_iso_date, parse_option_contracts_v3, parse_time_text, +}; +use super::v3::{ + CALENDAR_STATUS_EARLY_CLOSE, CALENDAR_STATUS_FULL_CLOSE, CALENDAR_STATUS_OPEN, + CALENDAR_STATUS_WEEKEND, +}; +use super::{ + extract_number_column, parse_eod_ticks, parse_greeks_all_ticks, parse_greeks_first_order_ticks, + parse_greeks_second_order_ticks, parse_greeks_third_order_ticks, parse_trade_ticks, + DecodeError, +}; +use crate::proto; + +/// Build a DataValue containing a Number. +fn dv_number(n: i64) -> proto::DataValue { + proto::DataValue { + data_type: Some(proto::data_value::DataType::Number(n)), + } +} + +/// Build a DataValue containing a Price. +fn dv_price(value: i32, r#type: i32) -> proto::DataValue { + proto::DataValue { + data_type: Some(proto::data_value::DataType::Price(proto::Price { + value, + r#type, + })), + } +} + +/// Build a DataValue containing NullValue. +fn dv_null() -> proto::DataValue { + proto::DataValue { + data_type: Some(proto::data_value::DataType::NullValue(0)), + } +} + +/// Build a DataValue containing a Timestamp. +fn dv_timestamp(epoch_ms: u64) -> proto::DataValue { + proto::DataValue { + data_type: Some(proto::data_value::DataType::Timestamp( + proto::ZonedDateTime { epoch_ms, zone: 0 }, + )), + } +} + +/// Build a DataValue with no data_type set (missing). +fn dv_missing() -> proto::DataValue { + proto::DataValue { data_type: None } +} + +/// Build a DataValue containing Text. +fn dv_text(s: &str) -> proto::DataValue { + proto::DataValue { + data_type: Some(proto::data_value::DataType::Text(s.to_string())), + } +} + +fn row_of(values: Vec) -> proto::DataValueList { + proto::DataValueList { values } +} + +#[test] +fn row_number_returns_value_for_number_cell() { + let row = row_of(vec![dv_number(42)]); + assert_eq!(row_number(&row, 0).unwrap(), Some(42)); +} + +#[test] +fn row_number_returns_none_for_null_cell() { + let row = row_of(vec![dv_null()]); + assert_eq!(row_number(&row, 0).unwrap(), None); +} + +#[test] +fn row_number_errors_on_unset_cell() { + // A DataValue with the oneof unset is a wire-protocol anomaly. + // Java's `PojoMessageUtils.convert` hits the default arm for + // `DATATYPE_NOT_SET` and throws `IllegalArgumentException`; we + // surface it as `TypeMismatch { observed: "Unset" }`. + let row = row_of(vec![dv_missing()]); + assert_eq!( + row_number(&row, 0), + Err(DecodeError::TypeMismatch { + column: 0, + expected: "Number|Timestamp", + observed: "Unset", + }) + ); +} + +#[test] +fn row_number_errors_on_out_of_bounds() { + let row = row_of(vec![]); + assert_eq!( + row_number(&row, 5), + Err(DecodeError::MissingCell { column: 5 }) + ); +} + +#[test] +fn row_number_errors_on_text_cell() { + let row = row_of(vec![dv_text("oops")]); + assert_eq!( + row_number(&row, 0), + Err(DecodeError::TypeMismatch { + column: 0, + expected: "Number|Timestamp", + observed: "Text", + }) + ); +} + +#[test] +fn row_number_errors_on_price_cell() { + let row = row_of(vec![dv_price(12345, 10)]); + assert_eq!( + row_number(&row, 0), + Err(DecodeError::TypeMismatch { + column: 0, + expected: "Number|Timestamp", + observed: "Price", + }) + ); +} + +#[test] +fn row_number_accepts_timestamp_for_time_columns() { + // v3 MDDS sends `ms_of_day` as a Timestamp. + let epoch_ms: u64 = 1_775_050_200_000; // 2026-04-01 09:30 ET + let row = row_of(vec![dv_timestamp(epoch_ms)]); + assert_eq!(row_number(&row, 0).unwrap(), Some(34_200_000)); +} + +#[test] +fn row_text_errors_on_number_cell() { + let row = row_of(vec![dv_number(42)]); + assert_eq!( + row_text(&row, 0), + Err(DecodeError::TypeMismatch { + column: 0, + expected: "Text", + observed: "Number", + }) + ); +} + +#[test] +fn row_price_f64_accepts_number_cell() { + // Documented v3 MDDS behavior: f64 fields may arrive as plain Number. + let row = row_of(vec![dv_number(1_500_000)]); + assert_eq!(row_price_f64(&row, 0).unwrap(), Some(1_500_000.0)); +} + +#[test] +fn row_price_value_returns_value_for_price_cell() { + let row = row_of(vec![dv_price(12345, 10)]); + assert_eq!(row_price_value(&row, 0).unwrap(), Some(12345)); +} + +#[test] +fn row_price_value_returns_none_for_null_cell() { + let row = row_of(vec![dv_null()]); + assert_eq!(row_price_value(&row, 0).unwrap(), None); +} + +#[test] +fn row_price_type_returns_type_for_price_cell() { + let row = row_of(vec![dv_price(12345, 10)]); + assert_eq!(row_price_type(&row, 0).unwrap(), Some(10)); +} + +#[test] +fn row_price_type_returns_none_for_null_cell() { + let row = row_of(vec![dv_null()]); + assert_eq!(row_price_type(&row, 0).unwrap(), None); +} + +#[test] +fn null_cells_dont_corrupt_trade_ticks() { + // Build a minimal DataTable with one row that has a NullValue in a field. + // Note: "price" header triggers Price-typed extraction, so we use a Price cell. + let table = proto::DataTable { + headers: vec![ + "ms_of_day".into(), + "sequence".into(), + "ext_condition1".into(), + "ext_condition2".into(), + "ext_condition3".into(), + "ext_condition4".into(), + "condition".into(), + "size".into(), + "exchange".into(), + "price".into(), + "condition_flags".into(), + "price_flags".into(), + "volume_type".into(), + "records_back".into(), + "date".into(), + ], + data_table: vec![row_of(vec![ + dv_number(34200000), // ms_of_day + dv_number(1), // sequence + dv_null(), // ext_condition1 = NullValue + dv_number(0), // ext_condition2 + dv_number(0), // ext_condition3 + dv_number(0), // ext_condition4 + dv_number(0), // condition + dv_number(100), // size + dv_number(4), // exchange + dv_price(15000, 10), // price (Price-typed because header is "price") + dv_number(0), // condition_flags + dv_number(0), // price_flags + dv_number(0), // volume_type + dv_number(0), // records_back + dv_number(20240301), // date + ])], + }; + + let ticks = parse_trade_ticks(&table).unwrap(); + assert_eq!(ticks.len(), 1); + let tick = &ticks[0]; + assert_eq!(tick.ms_of_day, 34200000); + // NullValue should default to 0, not corrupt subsequent fields. + assert_eq!(tick.ext_condition1, 0); + assert_eq!(tick.size, 100); + assert!((tick.price - 15000.0).abs() < 1e-10); + assert_eq!(tick.date, 20240301); +} + +#[test] +fn extract_number_column_returns_none_for_null() { + let table = proto::DataTable { + headers: vec!["val".into()], + data_table: vec![ + row_of(vec![dv_number(10)]), + row_of(vec![dv_null()]), + row_of(vec![dv_number(30)]), + ], + }; + + let col = extract_number_column(&table, "val"); + assert_eq!(col, vec![Some(10), None, Some(30)]); +} + +#[test] +fn parse_eod_timestamp_aliases_decode_time_and_date_separately() { + // 2026-04-01 13:30:00 UTC = 2026-04-01 09:30:00 ET (EDT). + let epoch_ms: u64 = 1_775_050_200_000; + let table = proto::DataTable { + headers: vec![ + "timestamp".into(), + "timestamp2".into(), + "open".into(), + "close".into(), + ], + data_table: vec![row_of(vec![ + dv_timestamp(epoch_ms), + dv_timestamp(epoch_ms), + dv_number(15000), + dv_number(15100), + ])], + }; + + let ticks = parse_eod_ticks(&table).unwrap(); + assert_eq!(ticks.len(), 1); + assert_eq!(ticks[0].ms_of_day, 34_200_000); + assert_eq!(ticks[0].ms_of_day2, 34_200_000); + assert_eq!(ticks[0].date, 20260401); + assert!((ticks[0].open - 15000.0).abs() < 1e-10); + assert!((ticks[0].close - 15100.0).abs() < 1e-10); +} + +#[test] +fn row_number_i64_decodes_price_cells() { + // MDDS sends large integer fields as Price cells, not Number cells. + // Price encoding: price_type centered at 10. + // type=10 → value as-is, type=13 → value * 10^3, type=7 → value / 10^3 + // Example: Price { value: 3842, type: 19 } = 3842 * 10^9 = 3_842_000_000_000 + let row = row_of(vec![dv_price(3842, 19)]); + assert_eq!( + row_number_i64(&row, 0).unwrap(), + Some(3_842_000_000_000_i64) + ); +} + +#[test] +fn row_number_i64_still_decodes_number_cells() { + let row = row_of(vec![dv_number(999_999_999)]); + assert_eq!(row_number_i64(&row, 0).unwrap(), Some(999_999_999)); +} + +#[test] +fn row_number_i64_returns_none_for_null() { + let row = row_of(vec![dv_null()]); + assert_eq!(row_number_i64(&row, 0).unwrap(), None); +} + +#[test] +fn row_number_i64_errors_on_text_cell() { + let row = row_of(vec![dv_text("oops")]); + assert_eq!( + row_number_i64(&row, 0), + Err(DecodeError::TypeMismatch { + column: 0, + expected: "Number|Price", + observed: "Text", + }) + ); +} + +/// Pin a Price cell past `2^53` to the i64-native result for `type=17`. +#[test] +fn row_number_i64_price_cell_returns_bit_exact_i64() { + let row = row_of(vec![dv_price(1_073_741_823, 17)]); + let got = row_number_i64(&row, 0).unwrap().expect("Some"); + assert_eq!(got, 10_737_418_230_000_000_i64); + assert!(got > (1_i64 << 53)); +} + +/// `value == 0` decodes to 0 regardless of the exponent. Mathematically +/// the product is zero; the decoder must not reject a zero cell, even +/// when `price_type` is at the clamp boundary. +#[test] +fn row_number_i64_price_zero_value_short_circuits() { + let row = row_of(vec![dv_price(0, 19)]); + assert_eq!(row_number_i64(&row, 0), Ok(Some(0))); +} + +/// `row_number_i64` and `row_price_f64` must agree on the same wire +/// cell. With `type=19` (in-range) and `value=42`, `row_price_f64` +/// routes through `Price::new` which keeps `price_type=19`, and +/// `row_number_i64` produces the i64-native scale. Both should match. +/// Manual: 42 * 10^(19-10) = 42 * 10^9 = 42_000_000_000. +#[test] +fn row_number_i64_matches_row_price_f64_at_type_19() { + let row = row_of(vec![dv_price(42, 19)]); + let as_int = row_number_i64(&row, 0).unwrap().expect("Some"); + let as_float = row_price_f64(&row, 0).unwrap().expect("Some"); + assert_eq!(as_int, 42_000_000_000_i64); + assert!((as_float - 42_000_000_000.0_f64).abs() < 1.0); +} + +/// `price_type=20` is out-of-range; both decoders must clamp to 19 +/// (matching `Price::new`). A `type=20` cell and a `type=19` cell with +/// the same value must therefore decode to the same i64. +#[test] +fn row_number_i64_clamps_price_type_above_19() { + let row_clamped = row_of(vec![dv_price(7, 20)]); + let row_in_range = row_of(vec![dv_price(7, 19)]); + assert_eq!( + row_number_i64(&row_clamped, 0).unwrap(), + row_number_i64(&row_in_range, 0).unwrap(), + ); + // Pin the absolute value too: 7 * 10^9 = 7_000_000_000. + assert_eq!( + row_number_i64(&row_clamped, 0).unwrap(), + Some(7_000_000_000_i64) + ); +} + +/// Maximum scale-up under the clamped contract: `value=i32::MAX, +/// type=19` yields `i32::MAX * 10^9 = 2_147_483_647_000_000_000`, +/// which is below `i64::MAX = 9_223_372_036_854_775_807`. The product +/// must fit and decode bit-exact (no `TypeMismatch`). +#[test] +fn row_number_i64_max_in_range_price_fits_i64() { + let row = row_of(vec![dv_price(i32::MAX, 19)]); + assert_eq!( + row_number_i64(&row, 0).unwrap(), + Some(2_147_483_647_000_000_000_i64), + ); +} + +#[test] +fn parse_calendar_v3_holiday() { + // Simulate calendar_year response for a holiday (full_close). + let table = proto::DataTable { + headers: vec!["date".into(), "type".into(), "open".into(), "close".into()], + data_table: vec![row_of(vec![ + dv_text("2025-01-01"), + dv_text("full_close"), + dv_null(), + dv_null(), + ])], + }; + + let days = parse_calendar_days_v3(&table).unwrap(); + assert_eq!(days.len(), 1); + let d = &days[0]; + assert_eq!(d.date, 20250101); + assert_eq!(d.is_open, 0); + assert_eq!(d.open_time, 0); + assert_eq!(d.close_time, 0); + assert_eq!(d.status, CALENDAR_STATUS_FULL_CLOSE); +} + +#[test] +fn parse_calendar_v3_open_day() { + // Simulate calendar_on_date response for a regular trading day. + // Note: on_date and open_today omit the "date" column. + let table = proto::DataTable { + headers: vec!["type".into(), "open".into(), "close".into()], + data_table: vec![row_of(vec![ + dv_text("open"), + dv_text("09:30:00"), + dv_text("16:00:00"), + ])], + }; + + let days = parse_calendar_days_v3(&table).unwrap(); + assert_eq!(days.len(), 1); + let d = &days[0]; + assert_eq!(d.date, 0); // no date column + assert_eq!(d.is_open, 1); + assert_eq!(d.open_time, 34_200_000); // 9:30 AM = 9*3600+30*60 = 34200 seconds = 34200000 ms + assert_eq!(d.close_time, 57_600_000); // 4:00 PM = 16*3600 = 57600 seconds = 57600000 ms + assert_eq!(d.status, CALENDAR_STATUS_OPEN); +} + +#[test] +fn parse_calendar_v3_early_close() { + // Simulate an early close day (day after Thanksgiving). + let table = proto::DataTable { + headers: vec!["date".into(), "type".into(), "open".into(), "close".into()], + data_table: vec![row_of(vec![ + dv_text("2025-11-28"), + dv_text("early_close"), + dv_text("09:30:00"), + dv_text("13:00:00"), + ])], + }; + + let days = parse_calendar_days_v3(&table).unwrap(); + assert_eq!(days.len(), 1); + let d = &days[0]; + assert_eq!(d.date, 20251128); + assert_eq!(d.is_open, 1); + assert_eq!(d.open_time, 34_200_000); + assert_eq!(d.close_time, 46_800_000); // 1:00 PM = 13*3600 = 46800 seconds = 46800000 ms + assert_eq!(d.status, CALENDAR_STATUS_EARLY_CLOSE); +} + +#[test] +fn parse_calendar_v3_weekend() { + let table = proto::DataTable { + headers: vec!["type".into(), "open".into(), "close".into()], + data_table: vec![row_of(vec![dv_text("weekend"), dv_null(), dv_null()])], + }; + + let days = parse_calendar_days_v3(&table).unwrap(); + assert_eq!(days.len(), 1); + let d = &days[0]; + assert_eq!(d.is_open, 0); + assert_eq!(d.status, CALENDAR_STATUS_WEEKEND); +} + +#[test] +fn parse_time_text_valid() { + assert_eq!(parse_time_text("09:30:00"), 34_200_000); + assert_eq!(parse_time_text("16:00:00"), 57_600_000); + assert_eq!(parse_time_text("13:00:00"), 46_800_000); + assert_eq!(parse_time_text("00:00:00"), 0); +} + +#[test] +fn parse_time_text_invalid_returns_zero() { + assert_eq!(parse_time_text("invalid"), 0); + assert_eq!(parse_time_text(""), 0); +} + +#[test] +fn parse_iso_date_yyyymmdd_passthrough_and_iso_split() { + assert_eq!(parse_iso_date("20260413"), 20260413); + assert_eq!(parse_iso_date("2026-04-13"), 20260413); + assert_eq!(parse_iso_date("not-a-date"), 0); +} + +#[test] +fn parse_trade_ticks_propagates_type_mismatch() { + // A Text cell in an i32 column is a schema violation — the parser + // must surface it, not silently coerce to 0. + let table = proto::DataTable { + headers: vec!["ms_of_day".into(), "price".into()], + data_table: vec![row_of(vec![dv_text("not-a-number"), dv_price(15000, 10)])], + }; + let err = parse_trade_ticks(&table).unwrap_err(); + assert!( + matches!(err, DecodeError::TypeMismatch { .. }), + "expected TypeMismatch, got {err:?}" + ); +} + +// ─────────── Unset-oneof is an error at every strict decode site ─────────── +// +// A `DataValue` with its `data_type` oneof unset is a wire-protocol +// anomaly (Java's `PojoMessageUtils.convert` default arm throws +// `IllegalArgumentException`). The helpers `row_number` / `row_date` / +// etc. already surface it as `TypeMismatch { observed: "Unset" }`. These +// tests pin the same behaviour on the call-sites that used to coalesce +// `NullValue | None` to zero: `parse_option_contracts_v3`, +// `parse_calendar_days_v3`, the generator-emitted EOD helpers, and the +// generator-emitted contract-id injected `expiration` / `right` fields. + +#[test] +fn parse_option_contracts_v3_errors_on_unset_expiration() { + let table = proto::DataTable { + headers: vec!["root".into(), "expiration".into()], + data_table: vec![row_of(vec![dv_text("AAPL"), dv_missing()])], + }; + assert_eq!( + parse_option_contracts_v3(&table).unwrap_err(), + DecodeError::TypeMismatch { + column: 1, + expected: "Number|Text", + observed: "Unset", + } + ); +} + +#[test] +fn parse_option_contracts_v3_errors_on_unset_right() { + let table = proto::DataTable { + headers: vec!["root".into(), "right".into()], + data_table: vec![row_of(vec![dv_text("AAPL"), dv_missing()])], + }; + assert_eq!( + parse_option_contracts_v3(&table).unwrap_err(), + DecodeError::TypeMismatch { + column: 1, + expected: "Number|Text", + observed: "Unset", + } + ); +} + +#[test] +fn parse_calendar_days_v3_errors_on_unset_date() { + let table = proto::DataTable { + headers: vec!["date".into(), "type".into()], + data_table: vec![row_of(vec![dv_missing(), dv_text("open")])], + }; + assert_eq!( + parse_calendar_days_v3(&table).unwrap_err(), + DecodeError::TypeMismatch { + column: 0, + expected: "Number|Timestamp|Text", + observed: "Unset", + } + ); +} + +#[test] +fn parse_calendar_days_v3_errors_on_unset_open_time() { + // `decode_calendar_time` is the helper covering both `open` and + // `close`; one test pins the shared path. + let table = proto::DataTable { + headers: vec!["type".into(), "open".into(), "close".into()], + data_table: vec![row_of(vec![ + dv_text("open"), + dv_missing(), + dv_text("16:00:00"), + ])], + }; + assert_eq!( + parse_calendar_days_v3(&table).unwrap_err(), + DecodeError::TypeMismatch { + column: 1, + expected: "Text|Number", + observed: "Unset", + } + ); +} + +#[test] +fn parse_eod_ticks_errors_on_unset_cell() { + // `parse_eod_ticks` is generator-emitted with the `eod_num` / + // `eod_date` / `eod_price` helpers; one test pins the shared path. + let table = proto::DataTable { + headers: vec!["timestamp".into(), "open".into()], + data_table: vec![row_of(vec![dv_missing(), dv_number(15000)])], + }; + let err = parse_eod_ticks(&table).unwrap_err(); + assert_eq!( + err, + DecodeError::TypeMismatch { + column: 0, + expected: "Number|Price|Timestamp", + observed: "Unset", + } + ); +} + +#[test] +fn parse_trade_ticks_errors_on_unset_injected_expiration() { + // `parse_trade_ticks` is generator-emitted with `contract_id = true`; + // an `expiration` header in the server payload triggers the injected + // `expiration` / `strike` / `right` decode. An unset cell there used + // to coalesce to 0; now it must fail loud. + let table = proto::DataTable { + headers: vec!["ms_of_day".into(), "price".into(), "expiration".into()], + data_table: vec![row_of(vec![ + dv_number(34_200_000), + dv_price(15000, 10), + dv_missing(), + ])], + }; + let err = parse_trade_ticks(&table).unwrap_err(); + assert_eq!( + err, + DecodeError::TypeMismatch { + column: 2, + expected: "Number|Text", + observed: "Unset", + } + ); +} + +#[test] +fn parse_trade_ticks_errors_on_unset_injected_right() { + let table = proto::DataTable { + headers: vec!["ms_of_day".into(), "price".into(), "right".into()], + data_table: vec![row_of(vec![ + dv_number(34_200_000), + dv_price(15000, 10), + dv_missing(), + ])], + }; + let err = parse_trade_ticks(&table).unwrap_err(); + assert_eq!( + err, + DecodeError::TypeMismatch { + column: 2, + expected: "Number|Text", + observed: "Unset", + } + ); +} + +#[test] +fn parse_greeks_all_ticks_decodes_price_encoded_greeks() { + // Regression: an earlier strict decode rejected Price cells for Greek + // columns, but the v3 MDDS server sends Greeks as Price-encoded + // values (mirroring Java's `dataValue2Object` -> BigDecimal path). + // Live run #24520486541 on main surfaced this as + // "column 13: expected Number, got Price" + // on `option_snapshot_greeks_first_order::bulk_chain` and peers. + // Pin Price-cell decoding for both IV and a Greek so a future + // strict-Number tightening can't re-break it silently. + let table = proto::DataTable { + headers: vec![ + "ms_of_day".into(), + "implied_volatility".into(), + "delta".into(), + ], + data_table: vec![row_of(vec![ + dv_number(34_200_000), + // IV = 0.1234 encoded with price_type = 6 (value * 10^-4). + dv_price(1234, 6), + // Delta = 0.5 encoded with price_type = 9 (value * 10^-1). + dv_price(5, 9), + ])], + }; + let ticks = parse_greeks_all_ticks(&table).unwrap(); + assert_eq!(ticks.len(), 1); + assert!((ticks[0].implied_volatility - 0.1234).abs() < 1e-10); + assert!((ticks[0].delta - 0.5).abs() < 1e-10); +} + +/// Pin the `implied_vol → implied_volatility` and `underlying_timestamp +/// → underlying_ms_of_day` aliases in `HEADER_ALIASES` by decoding a wire +/// payload whose headers use ONLY the v3 server-side names. If either +/// alias entry is dropped or mistyped, the matching schema field +/// silently zero-defaults via `opt_float` / `opt_number` (see the +/// generated `parse_greeks_all_ticks` body), and this test catches that +/// regression. +/// +/// The companion fixture-driven test +/// `crates/thetadatadx/tests/test_decode_captures.rs::greeks_all_*` +/// can't catch a broken `implied_vol` alias on its own because the +/// captured fixture's `first_row_implied_volatility` is `0.0` — a +/// missing alias and a real zero IV are indistinguishable there. +#[test] +fn parse_greeks_all_ticks_resolves_implied_vol_and_underlying_timestamp_aliases() { + // Headers use the v3 server-side names. Schema names + // (`implied_volatility`, `underlying_ms_of_day`) are deliberately + // absent so the parser MUST resolve them via `HEADER_ALIASES`. + let table = proto::DataTable { + headers: vec![ + "ms_of_day".into(), + "implied_vol".into(), + "underlying_timestamp".into(), + ], + // IV = 0.42 encoded with price_type = 6 (value * 10^-4). + // underlying_timestamp epoch_ms 1_775_050_200_000 corresponds + // to 2026-04-01 09:30 ET, which `row_number` converts to + // ms-of-day 34_200_000 (matching `first_row_underlying_ms_of_day` + // in the option_history_greeks_all fixture meta). + data_table: vec![row_of(vec![ + dv_number(34_200_000), + dv_price(4200, 6), + dv_timestamp(1_775_050_200_000), + ])], + }; + let ticks = parse_greeks_all_ticks(&table).unwrap(); + assert_eq!(ticks.len(), 1); + let t = &ticks[0]; + + // Non-zero IV proves the `implied_vol` alias resolved; a broken + // alias would produce 0.0 from the `opt_float(None)` arm. + assert!( + (t.implied_volatility - 0.42).abs() < 1e-9, + "implied_vol alias did not resolve: got {}", + t.implied_volatility, + ); + // Non-zero ms-of-day proves the `underlying_timestamp` alias + // resolved; a broken alias would produce 0 from `opt_number(None)`. + assert_eq!(t.underlying_ms_of_day, 34_200_000); +} + +#[test] +fn parse_greeks_all_ticks_still_decodes_number_cells() { + // Companion to the Price-cell regression test: Number cells must + // still decode, matching Java's dispatch-on-wire-type semantics. + let table = proto::DataTable { + headers: vec!["ms_of_day".into(), "implied_volatility".into()], + data_table: vec![row_of(vec![dv_number(34_200_000), dv_number(0)])], + }; + let ticks = parse_greeks_all_ticks(&table).unwrap(); + assert_eq!(ticks.len(), 1); + assert!(ticks[0].implied_volatility.abs() < 1e-10); +} + +/// Vendor wire shape for `option_*_greeks_first_order`: only the seven +/// first-order columns plus IV pair — vanna/charm/vomma/veta/speed/ +/// zomma/color/ultima/d1/d2/dual_delta/dual_gamma/vera are absent and +/// must default to `0.0` without surfacing any `find_header` warn. +/// Column layout pinned to `scripts/upstream_openapi.yaml` schema +/// `items_option_snapshot_greeks_first_order`. +#[test] +fn parse_greeks_all_ticks_decodes_first_order_subset_with_silent_gaps() { + let table = proto::DataTable { + headers: vec![ + "ms_of_day".into(), + "implied_volatility".into(), + "delta".into(), + "theta".into(), + "vega".into(), + "rho".into(), + "epsilon".into(), + "lambda".into(), + "iv_error".into(), + "date".into(), + ], + data_table: vec![row_of(vec![ + dv_number(34_200_000), + dv_price(2142, 6), // implied_volatility = 0.2142 + dv_price(5023, 6), // delta = 0.5023 + dv_price(-114, 6), // theta = -0.0114 + dv_price(8741, 6), // vega = 0.8741 + dv_price(13598, 6), // rho = 1.3598 + dv_price(-1976, 6), // epsilon = -0.1976 + dv_price(32052, 6), // lambda = 3.2052 + dv_price(-3, 6), // iv_error = -3 / 10^4 = -0.0003 + dv_number(20_240_614), + ])], + }; + let ticks = parse_greeks_all_ticks(&table).unwrap(); + assert_eq!(ticks.len(), 1); + let t = &ticks[0]; + + // Wire-present columns: bit-exact against the input. + // `dv_price(value, 6)` decodes as `value * 10^(6-10) = value / 10000` + // (see `tdbe::types::price::Price::to_f64`). + assert_eq!(t.ms_of_day, 34_200_000); + assert!((t.implied_volatility - 0.2142).abs() < 1e-9); + assert!((t.delta - 0.5023).abs() < 1e-9); + assert!((t.theta - -0.0114).abs() < 1e-9); + assert!((t.vega - 0.8741).abs() < 1e-9); + assert!((t.rho - 1.3598).abs() < 1e-9); + assert!((t.epsilon - -0.1976).abs() < 1e-9); + assert!((t.lambda - 3.2052).abs() < 1e-9); + assert!((t.iv_error - -0.0003).abs() < 1e-9); + assert_eq!(t.date, 20_240_614); + + // Wire-absent columns: zero-defaulted. These are the columns the + // server does NOT publish for `_greeks_first_order` — `find_header` + // returning `None` for each must NOT yield an error and must NOT + // warn (the pre-fix behaviour spammed eight warn lines per row). + assert_eq!(t.gamma, 0.0); + assert_eq!(t.vanna, 0.0); + assert_eq!(t.charm, 0.0); + assert_eq!(t.vomma, 0.0); + assert_eq!(t.veta, 0.0); + assert_eq!(t.speed, 0.0); + assert_eq!(t.zomma, 0.0); + assert_eq!(t.color, 0.0); + assert_eq!(t.ultima, 0.0); + assert_eq!(t.d1, 0.0); + assert_eq!(t.d2, 0.0); + assert_eq!(t.dual_delta, 0.0); + assert_eq!(t.dual_gamma, 0.0); + assert_eq!(t.vera, 0.0); +} + +/// Vendor wire shape for `option_*_greeks_second_order`: gamma / vanna +/// / charm / vomma / veta plus IV pair. Column layout pinned to +/// upstream OpenAPI schema `items_option_snapshot_greeks_second_order`. +#[test] +fn parse_greeks_all_ticks_decodes_second_order_subset_with_silent_gaps() { + let table = proto::DataTable { + headers: vec![ + "ms_of_day".into(), + "implied_volatility".into(), + "gamma".into(), + "vanna".into(), + "charm".into(), + "vomma".into(), + "veta".into(), + "iv_error".into(), + "date".into(), + ], + data_table: vec![row_of(vec![ + dv_number(34_200_000), + dv_price(2142, 6), // implied_volatility = 0.2142 + dv_price(120, 6), // gamma = 0.012 + dv_price(45, 6), // vanna = 0.0045 + dv_price(-12, 6), // charm = -0.0012 + dv_price(900, 6), // vomma = 0.09 + dv_price(-3, 6), // veta = -0.0003 + dv_price(-3, 6), // iv_error = -0.0003 + dv_number(20_240_614), + ])], + }; + let ticks = parse_greeks_all_ticks(&table).unwrap(); + assert_eq!(ticks.len(), 1); + let t = &ticks[0]; + + assert!((t.gamma - 0.012).abs() < 1e-9); + assert!((t.vanna - 0.0045).abs() < 1e-9); + assert!((t.charm - -0.0012).abs() < 1e-9); + assert!((t.vomma - 0.09).abs() < 1e-9); + assert!((t.veta - -0.0003).abs() < 1e-9); + + // First-order, third-order, and `_all`-only columns are absent + // on the wire and default to 0.0. + assert_eq!(t.delta, 0.0); + assert_eq!(t.speed, 0.0); + assert_eq!(t.zomma, 0.0); + assert_eq!(t.d1, 0.0); + assert_eq!(t.vera, 0.0); +} + +/// Vendor wire shape for `option_*_greeks_third_order`: speed / zomma / +/// color / ultima plus IV pair. This is the exact endpoint the Issue +/// #472 reporter was hitting — `option_snapshot_greeks_third_order` +/// previously emitted eight warn lines per row for the absent +/// first-order / second-order / `_all`-only columns. The test pins the +/// silent-gap behaviour so a future regression of `find_header` back +/// to `tracing::warn!` would surface here as a behavioural change. +/// Column layout pinned to upstream OpenAPI schema +/// `items_option_snapshot_greeks_third_order` (notably `vera` is NOT +/// in the third-order subset; it only ships in `_greeks_all`). +#[test] +fn parse_greeks_all_ticks_decodes_third_order_subset_with_silent_gaps() { + let table = proto::DataTable { + headers: vec![ + "ms_of_day".into(), + "implied_volatility".into(), + "speed".into(), + "zomma".into(), + "color".into(), + "ultima".into(), + "iv_error".into(), + "date".into(), + ], + data_table: vec![row_of(vec![ + dv_number(34_200_000), + dv_price(2142, 6), // implied_volatility = 0.2142 + dv_price(7, 6), // speed = 0.0007 + dv_price(15, 6), // zomma = 0.0015 + dv_price(-2, 6), // color = -0.0002 + dv_price(33, 6), // ultima = 0.0033 + dv_price(-3, 6), // iv_error = -0.0003 + dv_number(20_240_614), + ])], + }; + let ticks = parse_greeks_all_ticks(&table).unwrap(); + assert_eq!(ticks.len(), 1); + let t = &ticks[0]; + + assert!((t.speed - 0.0007).abs() < 1e-9); + assert!((t.zomma - 0.0015).abs() < 1e-9); + assert!((t.color - -0.0002).abs() < 1e-9); + assert!((t.ultima - 0.0033).abs() < 1e-9); + + // Vera is NOT a third-order column on the wire even though the + // generic `GreeksTick` struct carries the field. It must default + // to 0.0 here without warning. + assert_eq!(t.vera, 0.0); + // First-order and second-order columns also absent. + assert_eq!(t.delta, 0.0); + assert_eq!(t.gamma, 0.0); + assert_eq!(t.vanna, 0.0); + assert_eq!(t.d1, 0.0); + assert_eq!(t.dual_gamma, 0.0); +} + +/// `parse_greeks_first_order_ticks` against the column subset the +/// vendor publishes for `option_*_greeks_first_order` -- pinned to +/// `items_option_snapshot_greeks_first_order` in the upstream OpenAPI. +/// Asserts every column the parser fills decodes to the exact value +/// from the input row, and that the underlying-snapshot pair is +/// populated (the column subset is what differs from `_greeks_all`, +/// not the underlying tail). +#[test] +fn parse_greeks_first_order_ticks_decodes_first_order_subset() { + let table = proto::DataTable { + headers: vec![ + "ms_of_day".into(), + "bid".into(), + "ask".into(), + "delta".into(), + "theta".into(), + "vega".into(), + "rho".into(), + "epsilon".into(), + "lambda".into(), + "implied_volatility".into(), + "iv_error".into(), + "underlying_ms_of_day".into(), + "underlying_price".into(), + "date".into(), + ], + data_table: vec![row_of(vec![ + dv_number(34_200_000), + dv_price(15022, 6), // bid = 1.5022 + dv_price(15041, 6), // ask = 1.5041 + dv_price(5023, 6), // delta = 0.5023 + dv_price(-114, 6), // theta = -0.0114 + dv_price(8741, 6), // vega = 0.8741 + dv_price(13598, 6), // rho = 1.3598 + dv_price(-1976, 6), // epsilon = -0.1976 + dv_price(32052, 6), // lambda = 3.2052 + dv_price(2142, 6), // implied_volatility = 0.2142 + dv_price(-3, 6), // iv_error = -0.0003 + dv_number(34_200_001), + dv_price(580025, 6), // underlying_price = 58.0025 + dv_number(20_240_614), + ])], + }; + let ticks = parse_greeks_first_order_ticks(&table).unwrap(); + assert_eq!(ticks.len(), 1); + let t = &ticks[0]; + + assert_eq!(t.ms_of_day, 34_200_000); + assert!((t.bid - 1.5022).abs() < 1e-9); + assert!((t.ask - 1.5041).abs() < 1e-9); + assert!((t.delta - 0.5023).abs() < 1e-9); + assert!((t.theta - -0.0114).abs() < 1e-9); + assert!((t.vega - 0.8741).abs() < 1e-9); + assert!((t.rho - 1.3598).abs() < 1e-9); + assert!((t.epsilon - -0.1976).abs() < 1e-9); + assert!((t.lambda - 3.2052).abs() < 1e-9); + assert!((t.implied_volatility - 0.2142).abs() < 1e-9); + assert!((t.iv_error - -0.0003).abs() < 1e-9); + assert_eq!(t.underlying_ms_of_day, 34_200_001); + assert!((t.underlying_price - 58.0025).abs() < 1e-9); + assert_eq!(t.date, 20_240_614); +} + +/// `parse_greeks_second_order_ticks` against the column subset the +/// vendor publishes for `option_*_greeks_second_order` -- pinned to +/// `items_option_snapshot_greeks_second_order` in the upstream +/// OpenAPI. Second-order Greeks: gamma / vanna / charm / vomma / +/// veta plus the IV pair and the bid/ask quote pair. +#[test] +fn parse_greeks_second_order_ticks_decodes_second_order_subset() { + let table = proto::DataTable { + headers: vec![ + "ms_of_day".into(), + "bid".into(), + "ask".into(), + "gamma".into(), + "vanna".into(), + "charm".into(), + "vomma".into(), + "veta".into(), + "implied_volatility".into(), + "iv_error".into(), + "underlying_ms_of_day".into(), + "underlying_price".into(), + "date".into(), + ], + data_table: vec![row_of(vec![ + dv_number(34_200_000), + dv_price(15022, 6), // bid = 1.5022 + dv_price(15041, 6), // ask = 1.5041 + dv_price(120, 6), // gamma = 0.012 + dv_price(45, 6), // vanna = 0.0045 + dv_price(-12, 6), // charm = -0.0012 + dv_price(900, 6), // vomma = 0.09 + dv_price(-3, 6), // veta = -0.0003 + dv_price(2142, 6), // implied_volatility = 0.2142 + dv_price(-3, 6), // iv_error = -0.0003 + dv_number(34_200_001), + dv_price(580025, 6), + dv_number(20_240_614), + ])], + }; + let ticks = parse_greeks_second_order_ticks(&table).unwrap(); + assert_eq!(ticks.len(), 1); + let t = &ticks[0]; + + assert_eq!(t.ms_of_day, 34_200_000); + assert!((t.bid - 1.5022).abs() < 1e-9); + assert!((t.ask - 1.5041).abs() < 1e-9); + assert!((t.gamma - 0.012).abs() < 1e-9); + assert!((t.vanna - 0.0045).abs() < 1e-9); + assert!((t.charm - -0.0012).abs() < 1e-9); + assert!((t.vomma - 0.09).abs() < 1e-9); + assert!((t.veta - -0.0003).abs() < 1e-9); + assert!((t.implied_volatility - 0.2142).abs() < 1e-9); + assert!((t.iv_error - -0.0003).abs() < 1e-9); + assert_eq!(t.underlying_ms_of_day, 34_200_001); + assert!((t.underlying_price - 58.0025).abs() < 1e-9); + assert_eq!(t.date, 20_240_614); +} + +/// `parse_greeks_third_order_ticks` against the column subset the +/// vendor publishes for `option_*_greeks_third_order` -- pinned to +/// `items_option_snapshot_greeks_third_order` in the upstream +/// OpenAPI. Third-order Greeks: speed / zomma / color / ultima plus +/// the IV pair and the bid/ask quote pair. Notably the wire schema +/// does NOT publish `vera`; the struct does not carry it either. +#[test] +fn parse_greeks_third_order_ticks_decodes_third_order_subset() { + let table = proto::DataTable { + headers: vec![ + "ms_of_day".into(), + "bid".into(), + "ask".into(), + "speed".into(), + "zomma".into(), + "color".into(), + "ultima".into(), + "implied_volatility".into(), + "iv_error".into(), + "underlying_ms_of_day".into(), + "underlying_price".into(), + "date".into(), + ], + data_table: vec![row_of(vec![ + dv_number(34_200_000), + dv_price(15022, 6), // bid = 1.5022 + dv_price(15041, 6), // ask = 1.5041 + dv_price(7, 6), // speed = 0.0007 + dv_price(15, 6), // zomma = 0.0015 + dv_price(-2, 6), // color = -0.0002 + dv_price(33, 6), // ultima = 0.0033 + dv_price(2142, 6), // implied_volatility = 0.2142 + dv_price(-3, 6), // iv_error = -0.0003 + dv_number(34_200_001), + dv_price(580025, 6), + dv_number(20_240_614), + ])], + }; + let ticks = parse_greeks_third_order_ticks(&table).unwrap(); + assert_eq!(ticks.len(), 1); + let t = &ticks[0]; + + assert_eq!(t.ms_of_day, 34_200_000); + assert!((t.bid - 1.5022).abs() < 1e-9); + assert!((t.ask - 1.5041).abs() < 1e-9); + assert!((t.speed - 0.0007).abs() < 1e-9); + assert!((t.zomma - 0.0015).abs() < 1e-9); + assert!((t.color - -0.0002).abs() < 1e-9); + assert!((t.ultima - 0.0033).abs() < 1e-9); + assert!((t.implied_volatility - 0.2142).abs() < 1e-9); + assert!((t.iv_error - -0.0003).abs() < 1e-9); + assert_eq!(t.underlying_ms_of_day, 34_200_001); + assert!((t.underlying_price - 58.0025).abs() < 1e-9); + assert_eq!(t.date, 20_240_614); +} diff --git a/crates/thetadatadx/src/mdds/decode/transport.rs b/crates/thetadatadx/src/mdds/decode/transport.rs new file mode 100644 index 00000000..5f5b9ffe --- /dev/null +++ b/crates/thetadatadx/src/mdds/decode/transport.rs @@ -0,0 +1,91 @@ +//! `ResponseData` decompression and `DataTable` decode. +//! +//! Recycles a thread-local zstd decompressor and output buffer so repeated +//! decompressions of similar-sized payloads avoid allocator pressure on the +//! working buffer. + +use std::cell::RefCell; + +use crate::error::Error; +use crate::proto; + +thread_local! { + /// Reusable zstd decompressor **and** output buffer — avoids allocating both + /// a fresh decompressor context and a fresh `Vec` on every call. + /// + /// The decompressor context (~128 KB of zstd internal state) is recycled, and + /// the output buffer retains its capacity across calls so that repeated + /// decompressions of similar-sized payloads hit no allocator at all. + /// + /// We use `decompress_to_buffer` which writes into the pre-existing Vec + /// without reallocating when capacity is sufficient. The final `.clone()` + /// is necessary since we return ownership, but the internal buffer capacity + /// persists across calls — the key win is avoiding repeated alloc/dealloc + /// cycles for the working buffer. + static ZSTD_STATE: RefCell<(zstd::bulk::Decompressor<'static>, Vec)> = RefCell::new(( + // Infallible in practice: zstd decompressor creation only fails on OOM. + // thread_local! does not support Result, so unwrap is intentional here. + zstd::bulk::Decompressor::new().expect("zstd decompressor creation failed (possible OOM)"), + Vec::with_capacity(1024 * 1024), // 1 MB initial capacity + )); +} + +/// Decompress a `ResponseData` payload. Returns the raw protobuf bytes of the `DataTable`. +/// +/// # Unknown compression algorithms +/// +/// Prost's `.algo()` silently maps unknown enum values to the default (None=0), +/// so we check the raw i32 to detect truly unknown algorithms. Without this, +/// an unrecognized algorithm would be treated as uncompressed, producing garbage. +/// +/// # Buffer recycling +/// +/// Uses a thread-local `(Decompressor, Vec)` pair. The `Vec` retains its +/// capacity across calls, so repeated decompressions of similar-sized payloads +/// avoid hitting the allocator for the working buffer. The returned `Vec` +/// is a clone (we must return ownership), but the internal slab persists. +/// # Errors +/// +/// Returns [`Error::Decompress`] if the compression algorithm is unknown or +/// zstd decompression fails. +// Reason: original_size is a protobuf u64 that fits in usize for valid payloads. +#[allow(clippy::cast_possible_truncation)] +pub fn decompress_response(response: &proto::ResponseData) -> Result, Error> { + let algo_raw = response + .compression_description + .as_ref() + .map_or(0, |cd| cd.algo); + + match proto::CompressionAlgo::try_from(algo_raw) { + Ok(proto::CompressionAlgo::None) => Ok(response.compressed_data.clone()), + Ok(proto::CompressionAlgo::Zstd) => { + let original_size = usize::try_from(response.original_size).unwrap_or(0); + ZSTD_STATE.with(|cell| { + let (ref mut dec, ref mut buf) = *cell.borrow_mut(); + buf.clear(); + buf.resize(original_size, 0); + let n = dec + .decompress_to_buffer(&response.compressed_data, buf) + .map_err(|e| Error::Decompress(e.to_string()))?; + buf.truncate(n); + Ok(buf.clone()) + }) + } + _ => Err(Error::Decompress(format!( + "unknown compression algorithm: {algo_raw}" + ))), + } +} + +/// Decode a `ResponseData` into a `DataTable`. +/// +/// # Errors +/// +/// Returns [`Error::Decompress`] if decompression fails or [`Error::Decode`] +/// if protobuf deserialization fails. +pub fn decode_data_table(response: &proto::ResponseData) -> Result { + let bytes = decompress_response(response)?; + let table: proto::DataTable = + prost::Message::decode(bytes.as_slice()).map_err(|e| Error::Decode(e.to_string()))?; + Ok(table) +} diff --git a/crates/thetadatadx/src/mdds/decode/v3.rs b/crates/thetadatadx/src/mdds/decode/v3.rs new file mode 100644 index 00000000..7399905e --- /dev/null +++ b/crates/thetadatadx/src/mdds/decode/v3.rs @@ -0,0 +1,339 @@ +//! Hand-written parsers for v3 MDDS payload shapes that the macro-generated +//! parser cannot model directly. +//! +//! v3 publishes some columns as text (ISO dates, "PUT"/"CALL" rights, the +//! calendar `type` column) where the schema would otherwise expect numeric +//! cells. The hand-written parsers here dispatch on the cell's own wire +//! type, surfacing mismatches as [`DecodeError::TypeMismatch`] rather than +//! coalescing silently. + +use crate::proto; +use tdbe::types::tick::{CalendarDay, OptionContract}; + +use super::cell::{cell_type, row_price_f64, row_text}; +use super::error::{observed_name, DecodeError}; +use super::headers::find_header; + +/// Hand-written parser for `OptionContract` that handles the v3 server's +/// text-formatted fields (expiration as ISO date, right as "PUT"/"CALL"). +/// +/// The `expiration` and `right` columns legitimately arrive as either `Number` +/// or `Text` depending on the upstream version, so the parser dispatches on +/// the cell's own type rather than coalescing silently. Mismatched types +/// propagate as [`DecodeError::TypeMismatch`]. +/// +/// # Errors +/// +/// Returns [`DecodeError`] on type mismatch or missing cell. +pub fn parse_option_contracts_v3( + table: &crate::proto::DataTable, +) -> Result, DecodeError> { + let h: Vec<&str> = table + .headers + .iter() + .map(std::string::String::as_str) + .collect(); + + // Same schema-drift guard as the generated parsers: "no contracts today" + // is legitimate, but a rows-present response missing the required `root` + // column is a silent data-loss trap. The wire column is still named + // `root` (or `symbol` via the v3 alias in `decode::HEADER_ALIASES`); the + // `symbol` binding here is the public-API field name documented in the + // v3 vendor migration guide. + let symbol_idx = match find_header(&h, "root") { + Some(i) => i, + None => { + if table.data_table.is_empty() { + return Ok(vec![]); + } + return Err(DecodeError::MissingRequiredHeader { + header: "root", + rows: table.data_table.len(), + available: h.join(","), + }); + } + }; + let exp_idx = find_header(&h, "expiration"); + let strike_idx = find_header(&h, "strike"); + let right_idx = find_header(&h, "right"); + + table + .data_table + .iter() + .map(|row| { + let symbol = row_text(row, symbol_idx)?.unwrap_or_default(); + + // Expiration: `Number` carries YYYYMMDD directly; `Text` carries + // an ISO "2026-04-13" that we parse here. `NullValue` → 0 (legit + // null, coalesce). An unset oneof is a wire anomaly → TypeMismatch. + let expiration = match exp_idx { + Some(i) => match cell_type(row, i)? { + Some(proto::data_value::DataType::Number(n)) => *n as i32, + Some(proto::data_value::DataType::Text(s)) => parse_iso_date(s), + Some(proto::data_value::DataType::NullValue(_)) => 0, + None => { + return Err(DecodeError::TypeMismatch { + column: i, + expected: "Number|Text", + observed: "Unset", + }); + } + other => { + return Err(DecodeError::TypeMismatch { + column: i, + expected: "Number|Text", + observed: observed_name(other), + }); + } + }, + None => 0, + }; + + let strike = match strike_idx { + Some(i) => row_price_f64(row, i)?.unwrap_or(0.0), + None => 0.0, + }; + + // Right: `Number` carries the ASCII code directly; `Text` carries + // "PUT"/"CALL"/"P"/"C". `NullValue` / unknown text → 0. An unset + // oneof is a wire anomaly → TypeMismatch. + let right = match right_idx { + Some(i) => match cell_type(row, i)? { + Some(proto::data_value::DataType::Number(n)) => *n as i32, + Some(proto::data_value::DataType::Text(s)) => match s.as_str() { + "CALL" | "C" => 67, // ASCII 'C' + "PUT" | "P" => 80, // ASCII 'P' + _ => 0, + }, + Some(proto::data_value::DataType::NullValue(_)) => 0, + None => { + return Err(DecodeError::TypeMismatch { + column: i, + expected: "Number|Text", + observed: "Unset", + }); + } + other => { + return Err(DecodeError::TypeMismatch { + column: i, + expected: "Number|Text", + observed: observed_name(other), + }); + } + }, + None => 0, + }; + + Ok(OptionContract { + symbol, + expiration, + strike, + right, + }) + }) + .collect() +} + +/// Parse an ISO date string "2026-04-13" to YYYYMMDD integer 20260413. +// Reason: date parsing with known-safe integer ranges. +#[allow(clippy::cast_possible_truncation, clippy::missing_panics_doc)] +pub(crate) fn parse_iso_date(s: &str) -> i32 { + // Fast path: already numeric (YYYYMMDD) + if let Ok(n) = s.parse::() { + return n; + } + // ISO format: YYYY-MM-DD + let parts: Vec<&str> = s.split('-').collect(); + if parts.len() == 3 { + if let (Ok(y), Ok(m), Ok(d)) = ( + parts[0].parse::(), + parts[1].parse::(), + parts[2].parse::(), + ) { + return y * 10_000 + m * 100 + d; + } + } + 0 +} + +/// Parse a time string "HH:MM:SS" to milliseconds from midnight. +pub(crate) fn parse_time_text(s: &str) -> i32 { + let parts: Vec<&str> = s.split(':').collect(); + if parts.len() == 3 { + if let (Ok(h), Ok(m), Ok(sec)) = ( + parts[0].parse::(), + parts[1].parse::(), + parts[2].parse::(), + ) { + return (h * 3_600 + m * 60 + sec) * 1_000; + } + } + 0 +} + +/// Calendar day status constants. +/// +/// The v3 MDDS server sends a `type` column with text values. We map them to +/// integer constants for the `CalendarDay.status` field: +/// +/// | Server text | Constant | Meaning | +/// |----------------|----------|-----------------------------------| +/// | `"open"` | `0` | Normal trading day | +/// | `"early_close"`| `1` | Early close (e.g. day after Thanksgiving) | +/// | `"full_close"` | `2` | Market closed (holiday) | +/// | `"weekend"` | `3` | Weekend | +/// | (unknown) | `-1` | Unrecognized status text | +pub const CALENDAR_STATUS_OPEN: i32 = 0; +pub const CALENDAR_STATUS_EARLY_CLOSE: i32 = 1; +pub const CALENDAR_STATUS_FULL_CLOSE: i32 = 2; +pub const CALENDAR_STATUS_WEEKEND: i32 = 3; +pub const CALENDAR_STATUS_UNKNOWN: i32 = -1; + +/// Map a v3 calendar `type` text to `(is_open, status)`. +fn calendar_type_text(s: &str) -> (i32, i32) { + match s { + "open" => (1, CALENDAR_STATUS_OPEN), + "early_close" => (1, CALENDAR_STATUS_EARLY_CLOSE), + "full_close" => (0, CALENDAR_STATUS_FULL_CLOSE), + "weekend" => (0, CALENDAR_STATUS_WEEKEND), + _ => (0, CALENDAR_STATUS_UNKNOWN), + } +} + +/// Hand-written parser for `CalendarDay` that handles the v3 server's +/// text-formatted fields. +/// +/// The v3 MDDS server sends calendar data with different column names and types +/// than the generated parser expects: +/// +/// | Schema field | Server header | Server type | Mapping | +/// |--------------|---------------|-------------|---------------------------------------| +/// | `date` | `date` | Text | "2025-01-01" -> 20250101 | +/// | `is_open` | `type` | Text | "`open"/"early_close`" -> 1, else -> 0 | +/// | `open_time` | `open` | Text / Null | "09:30:00" -> 34200000 ms | +/// | `close_time` | `close` | Text / Null | "16:00:00" -> 57600000 ms | +/// | `status` | `type` | Text | See [`CALENDAR_STATUS_OPEN`] etc. | +/// +/// Note: `calendar_on_date` and `calendar_open_today` omit the `date` column. +/// Each column dispatches on the cell's own type rather than coalescing +/// silently — mismatched types propagate as [`DecodeError::TypeMismatch`]. +/// +/// # Errors +/// +/// Returns [`DecodeError`] on type mismatch or missing cell. +pub fn parse_calendar_days_v3( + table: &crate::proto::DataTable, +) -> Result, DecodeError> { + let h: Vec<&str> = table + .headers + .iter() + .map(std::string::String::as_str) + .collect(); + + let date_idx = h.iter().position(|&s| s == "date"); + let type_idx = h.iter().position(|&s| s == "type"); + let open_idx = h.iter().position(|&s| s == "open"); + let close_idx = h.iter().position(|&s| s == "close"); + + table + .data_table + .iter() + .map(|row| { + // date: Number carries YYYYMMDD, Timestamp converts to ET date, + // Text "2025-01-01" parses to YYYYMMDD. `NullValue` → 0 (legit + // null). Unset oneof is a wire anomaly → TypeMismatch. + let date = match date_idx { + Some(i) => match cell_type(row, i)? { + Some(proto::data_value::DataType::Number(n)) => *n as i32, + Some(proto::data_value::DataType::Timestamp(ts)) => { + tdbe::time::timestamp_to_date(ts.epoch_ms) + } + Some(proto::data_value::DataType::Text(s)) => parse_iso_date(s), + Some(proto::data_value::DataType::NullValue(_)) => 0, + None => { + return Err(DecodeError::TypeMismatch { + column: i, + expected: "Number|Timestamp|Text", + observed: "Unset", + }); + } + other => { + return Err(DecodeError::TypeMismatch { + column: i, + expected: "Number|Timestamp|Text", + observed: observed_name(other), + }); + } + }, + None => 0, + }; + + // type: Text "open"/"full_close"/"early_close"/"weekend"; Number + // kept as a future-proofing path. `NullValue` → (0, 0). Unset + // oneof is a wire anomaly → TypeMismatch. + let (is_open, status) = match type_idx { + Some(i) => match cell_type(row, i)? { + Some(proto::data_value::DataType::Text(s)) => calendar_type_text(s), + Some(proto::data_value::DataType::Number(n)) => { + let n = *n as i32; + (i32::from(n != 0), n) + } + Some(proto::data_value::DataType::NullValue(_)) => (0, 0), + None => { + return Err(DecodeError::TypeMismatch { + column: i, + expected: "Text|Number", + observed: "Unset", + }); + } + other => { + return Err(DecodeError::TypeMismatch { + column: i, + expected: "Text|Number", + observed: observed_name(other), + }); + } + }, + None => (0, 0), + }; + + let open_time = decode_calendar_time(row, open_idx)?; + let close_time = decode_calendar_time(row, close_idx)?; + + Ok(CalendarDay { + date, + is_open, + open_time, + close_time, + status, + }) + }) + .collect() +} + +/// Decode a calendar `open`/`close` column. `Text "HH:MM:SS"` → ms-of-day; +/// `Number` kept as future-proofing. `NullValue` / absent column → 0. An unset +/// oneof is a wire anomaly → [`DecodeError::TypeMismatch`]. +fn decode_calendar_time( + row: &proto::DataValueList, + idx: Option, +) -> Result { + let Some(i) = idx else { + return Ok(0); + }; + match cell_type(row, i)? { + Some(proto::data_value::DataType::Text(s)) => Ok(parse_time_text(s)), + Some(proto::data_value::DataType::Number(n)) => Ok(*n as i32), + Some(proto::data_value::DataType::NullValue(_)) => Ok(0), + None => Err(DecodeError::TypeMismatch { + column: i, + expected: "Text|Number", + observed: "Unset", + }), + other => Err(DecodeError::TypeMismatch { + column: i, + expected: "Text|Number", + observed: observed_name(other), + }), + } +} diff --git a/crates/thetadatadx/src/mdds/mod.rs b/crates/thetadatadx/src/mdds/mod.rs index b28cbae1..fd213fee 100644 --- a/crates/thetadatadx/src/mdds/mod.rs +++ b/crates/thetadatadx/src/mdds/mod.rs @@ -37,6 +37,7 @@ //! live in the in-crate `wire_semantics` module. mod client; +pub mod decode; mod endpoints; mod stream; mod validate; diff --git a/crates/thetadatadx/src/observability.rs b/crates/thetadatadx/src/observability.rs index 865d083f..19f343bb 100644 --- a/crates/thetadatadx/src/observability.rs +++ b/crates/thetadatadx/src/observability.rs @@ -33,7 +33,7 @@ use crate::config::DirectConfig; /// Install the Prometheus exporter using the port configured on /// `config`. Returns `Ok(())` when the feature is disabled or when -/// `config.metrics_port` is `None` — callers don't need to guard at +/// `config.metrics.port` is `None` — callers don't need to guard at /// every call site. /// /// # Errors @@ -42,7 +42,7 @@ use crate::config::DirectConfig; /// to the configured port. Re-installation in the same process logs a /// warning and returns `Ok(())`. pub fn try_install_exporter(config: &DirectConfig) -> Result<(), crate::error::Error> { - let Some(port) = config.metrics_port else { + let Some(port) = config.metrics.port else { return Ok(()); }; install_exporter_impl(port) @@ -96,13 +96,13 @@ mod tests { #[test] fn try_install_noop_when_metrics_port_none() { let config = DirectConfig::production_defaults(); - assert!(config.metrics_port.is_none()); + assert!(config.metrics.port.is_none()); try_install_exporter(&config).expect("must be a no-op"); } #[test] fn with_metrics_port_builder_sets_port() { let config = DirectConfig::production_defaults().with_metrics_port(9090); - assert_eq!(config.metrics_port, Some(9090)); + assert_eq!(config.metrics.port, Some(9090)); } } diff --git a/crates/thetadatadx/src/unified.rs b/crates/thetadatadx/src/unified.rs index 6135885d..b1a5d1e3 100644 --- a/crates/thetadatadx/src/unified.rs +++ b/crates/thetadatadx/src/unified.rs @@ -177,11 +177,11 @@ impl ThetaDataDx { let config = self.historical.config(); let client = FpssClient::connect( &self.creds, - &config.fpss_hosts, - config.fpss_ring_size, - config.fpss_flush_mode, - config.reconnect_policy.clone(), - config.derive_ohlcvc, + &config.fpss.hosts, + config.fpss.ring_size, + config.fpss.flush_mode, + config.reconnect.policy.clone(), + config.fpss.derive_ohlcvc, move |event: &FpssEvent| { // Reader-thread side: clone the event and push onto the // bounded queue. On overflow the dispatcher drops the @@ -252,11 +252,11 @@ impl ThetaDataDx { let config = self.historical.config(); let client = FpssClient::connect( &self.creds, - &config.fpss_hosts, - config.fpss_ring_size, - config.fpss_flush_mode, - config.reconnect_policy.clone(), - config.derive_ohlcvc, + &config.fpss.hosts, + config.fpss.ring_size, + config.fpss.flush_mode, + config.reconnect.policy.clone(), + config.fpss.derive_ohlcvc, handler, )?; *guard = Some(client); diff --git a/docs-site/docs/changelog.md b/docs-site/docs/changelog.md index c3f44210..19721055 100644 --- a/docs-site/docs/changelog.md +++ b/docs-site/docs/changelog.md @@ -5,6 +5,40 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [8.0.36] - 2026-05-07 + +### Changed + +- **`crates/thetadatadx/src/decode.rs` (2177 LoC) split into 7 modules** + under `mdds/decode/{error,headers,transport,extract,cell,v3}`. Pure + structural refactor; public API unchanged via `mdds::decode::*` re-exports. +- **Eastern-time + DST primitives lifted to `tdbe::time`.** + `eastern_offset_ms`, `march_second_sunday_utc`, `november_first_sunday_utc`, + `april_first_sunday_utc`, `october_last_sunday_utc`, `civil_to_epoch_days`, + `timestamp_to_ms_of_day`, `timestamp_to_date` — single canonical module + reused by mdds, fpss, flatfiles. tdbe 0.12.9 → 0.12.10. +- **`crates/thetadatadx/src/fpss/protocol.rs` (1613 LoC) split into 4 modules** + under `fpss/protocol/`. `mod.rs` keeps constants and re-exports; + `contract.rs` holds `Contract` + 6 constructors + `Display` + `FromStr` + + OCC-21 parser; `wire.rs` holds payload builders / parsers; `subscription.rs` + holds `SubscriptionKind`. +- **`crates/thetadatadx/src/config.rs` (1396 LoC, 30 flat fields) refactored + into 7 nested typed sub-configs.** `DirectConfig` now contains `mdds`, + `fpss`, `reconnect`, `retry`, `auth`, `metrics`, `runtime`. Field-read + accessors preserved on `DirectConfig` for back-compat (`config.mdds_host()` + etc still work). Field-write callers must migrate to nested form + (`config.fpss.queue_depth = ...`). Adds `mdds.connect_timeout_secs` + (default 10s, covers prior LOW finding). +- **`crates/tdbe/src/conditions.rs` (2749 LoC) refactored to TOML-driven + codegen.** Source-of-truth at `crates/tdbe/data/{trade,quote}_conditions.toml` + (149 + 75 entries). `crates/tdbe/build.rs` reads the TOMLs and emits + `crates/tdbe/src/conditions/tables_generated.rs` with compile-time + const arrays. Public surface unchanged; new `condition_tables_pin` + test pins 12 known entries against the const arrays for round-trip + protection. + + Refs #500. + ## [8.0.35] - 2026-05-07 ### Documentation diff --git a/ffi/Cargo.toml b/ffi/Cargo.toml index d55134e6..ef6ffcdc 100644 --- a/ffi/Cargo.toml +++ b/ffi/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "thetadatadx-ffi" -version = "8.0.35" +version = "8.0.36" edition.workspace = true rust-version.workspace = true authors.workspace = true @@ -31,7 +31,7 @@ testing-panic-boundary = [] [dependencies] thetadatadx = { path = "../crates/thetadatadx" } -tdbe = { version = "0.12.9", path = "../crates/tdbe" } +tdbe = { version = "0.12.10", path = "../crates/tdbe" } tokio = { version = "1.52.1", features = ["rt-multi-thread"] } # Used by the FPSS streaming callback silent-drop observability path # (see `tdx_fpss_dropped_events` / `tdx_unified_dropped_events`). Keep diff --git a/ffi/src/auth.rs b/ffi/src/auth.rs index fa22411e..0f77ea74 100644 --- a/ffi/src/auth.rs +++ b/ffi/src/auth.rs @@ -138,7 +138,7 @@ pub unsafe extern "C" fn tdx_config_set_flush_mode(config: *mut TdxConfig, mode: return; } let config = unsafe { &mut *config }; - config.inner.fpss_flush_mode = match mode { + config.inner.fpss.flush_mode = match mode { 1 => thetadatadx::FpssFlushMode::Immediate, _ => thetadatadx::FpssFlushMode::Batched, }; @@ -156,7 +156,7 @@ pub unsafe extern "C" fn tdx_config_set_reconnect_policy(config: *mut TdxConfig, return; } let config = unsafe { &mut *config }; - config.inner.reconnect_policy = match policy { + config.inner.reconnect.policy = match policy { 1 => thetadatadx::ReconnectPolicy::Manual, _ => thetadatadx::ReconnectPolicy::Auto, }; @@ -174,7 +174,7 @@ pub unsafe extern "C" fn tdx_config_set_derive_ohlcvc(config: *mut TdxConfig, en return; } let config = unsafe { &mut *config }; - config.inner.derive_ohlcvc = enabled != 0; + config.inner.fpss.derive_ohlcvc = enabled != 0; }) } diff --git a/ffi/src/streaming.rs b/ffi/src/streaming.rs index 5996fb93..344a499e 100644 --- a/ffi/src/streaming.rs +++ b/ffi/src/streaming.rs @@ -1464,11 +1464,11 @@ pub unsafe extern "C" fn tdx_fpss_connect( inner: Arc::new(Mutex::new(None)), connect_params: FpssConnectParams { creds: creds.inner.clone(), - hosts: config.inner.fpss_hosts.clone(), - ring_size: config.inner.fpss_ring_size, - flush_mode: config.inner.fpss_flush_mode, - reconnect_policy: config.inner.reconnect_policy.clone(), - derive_ohlcvc: config.inner.derive_ohlcvc, + hosts: config.inner.fpss.hosts.clone(), + ring_size: config.inner.fpss.ring_size, + flush_mode: config.inner.fpss.flush_mode, + reconnect_policy: config.inner.reconnect.policy.clone(), + derive_ohlcvc: config.inner.fpss.derive_ohlcvc, }, dispatcher: Mutex::new(None), callback: Mutex::new(None), diff --git a/sdks/python/Cargo.lock b/sdks/python/Cargo.lock index 28fe2e2b..ab366f4d 100644 --- a/sdks/python/Cargo.lock +++ b/sdks/python/Cargo.lock @@ -2474,10 +2474,12 @@ checksum = "adb6935a6f5c20170eeceb1a3835a49e12e19d792f6dd344ccc76a985ca5a6ca" [[package]] name = "tdbe" -version = "0.12.9" +version = "0.12.10" dependencies = [ + "serde", "sonic-rs", "thiserror 2.0.18", + "toml", ] [[package]] @@ -2495,7 +2497,7 @@ dependencies = [ [[package]] name = "thetadatadx" -version = "8.0.35" +version = "8.0.36" dependencies = [ "crossbeam-channel", "disruptor", @@ -2530,7 +2532,7 @@ dependencies = [ [[package]] name = "thetadatadx-py" -version = "8.0.35" +version = "8.0.36" dependencies = [ "arrow", "arrow-array", diff --git a/sdks/python/Cargo.toml b/sdks/python/Cargo.toml index 716333f0..1f8d36e3 100644 --- a/sdks/python/Cargo.toml +++ b/sdks/python/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "thetadatadx-py" -version = "8.0.35" +version = "8.0.36" edition = "2021" description = "Python bindings for thetadatadx — native ThetaData SDK powered by Rust" license = "Apache-2.0" @@ -19,7 +19,7 @@ doc = false [dependencies] # The Rust SDK we're wrapping thetadatadx = { path = "../../crates/thetadatadx" } -tdbe = { version = "0.12.9", path = "../../crates/tdbe" } +tdbe = { version = "0.12.10", path = "../../crates/tdbe" } # Direct prost dep for decoding `thetadatadx::proto::ResponseData` bytes in # the `decode_response_bytes` hook. The main crate no longer re-exports diff --git a/sdks/python/src/lib.rs b/sdks/python/src/lib.rs index 13e7fe2a..a36bd46c 100644 --- a/sdks/python/src/lib.rs +++ b/sdks/python/src/lib.rs @@ -166,7 +166,8 @@ impl Credentials { // `frozen` + `skip_from_py_object` matches every generated pyclass: the // outer handle is immutable from Rust's perspective (no `&mut self` across // the GIL), while the inner `DirectConfig` is guarded by a `Mutex` so -// Python-side setters (`config.reconnect_policy = "auto"`) still mutate in +// Python-side setters (`config.reconnect_policy = "auto"`) still mutate the +// underlying nested `DirectConfig` in // place. Python-side semantics are unchanged. #[pyclass(module = "thetadatadx", frozen, skip_from_py_object)] @@ -218,7 +219,7 @@ impl Config { } }; let mut guard = self.inner.lock().unwrap_or_else(|e| e.into_inner()); - guard.reconnect_policy = parsed; + guard.reconnect.policy = parsed; Ok(()) } @@ -226,7 +227,7 @@ impl Config { #[getter] fn get_reconnect_policy(&self) -> &'static str { let guard = self.inner.lock().unwrap_or_else(|e| e.into_inner()); - match guard.reconnect_policy { + match guard.reconnect.policy { config::ReconnectPolicy::Auto => "auto", config::ReconnectPolicy::Manual => "manual", config::ReconnectPolicy::Custom(_) => "custom", @@ -240,23 +241,23 @@ impl Config { #[setter] fn set_derive_ohlcvc(&self, enabled: bool) { let mut guard = self.inner.lock().unwrap_or_else(|e| e.into_inner()); - guard.derive_ohlcvc = enabled; + guard.fpss.derive_ohlcvc = enabled; } /// Get the current OHLCVC derivation setting. #[getter] fn get_derive_ohlcvc(&self) -> bool { let guard = self.inner.lock().unwrap_or_else(|e| e.into_inner()); - guard.derive_ohlcvc + guard.fpss.derive_ohlcvc } fn __repr__(&self) -> String { let guard = self.inner.lock().unwrap_or_else(|e| e.into_inner()); format!( "Config(mdds={}:{}, fpss_hosts={})", - guard.mdds_host, - guard.mdds_port, - guard.fpss_hosts.len() + guard.mdds.host, + guard.mdds.port, + guard.fpss.hosts.len() ) } } diff --git a/sdks/typescript/Cargo.lock b/sdks/typescript/Cargo.lock index 3000545a..eed02a71 100644 --- a/sdks/typescript/Cargo.lock +++ b/sdks/typescript/Cargo.lock @@ -2117,10 +2117,12 @@ dependencies = [ [[package]] name = "tdbe" -version = "0.12.9" +version = "0.12.10" dependencies = [ + "serde", "sonic-rs", "thiserror 2.0.18", + "toml", ] [[package]] @@ -2138,7 +2140,7 @@ dependencies = [ [[package]] name = "thetadatadx" -version = "8.0.35" +version = "8.0.36" dependencies = [ "crossbeam-channel", "disruptor", @@ -2173,7 +2175,7 @@ dependencies = [ [[package]] name = "thetadatadx-napi" -version = "8.0.35" +version = "8.0.36" dependencies = [ "chrono", "napi", diff --git a/sdks/typescript/Cargo.toml b/sdks/typescript/Cargo.toml index 2e145c5d..f509aae4 100644 --- a/sdks/typescript/Cargo.toml +++ b/sdks/typescript/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "thetadatadx-napi" -version = "8.0.35" +version = "8.0.36" edition = "2021" description = "TypeScript/Node.js bindings for thetadatadx — native ThetaData SDK powered by Rust" license = "Apache-2.0" @@ -13,7 +13,7 @@ crate-type = ["cdylib"] [dependencies] thetadatadx = { path = "../../crates/thetadatadx" } -tdbe = { version = "0.12.9", path = "../../crates/tdbe" } +tdbe = { version = "0.12.10", path = "../../crates/tdbe" } napi = { version = "3.8.5", features = ["async", "tokio_rt", "serde-json", "napi6", "chrono_date"] } napi-derive = "3.5.4" diff --git a/sdks/typescript/npm/darwin-arm64/package.json b/sdks/typescript/npm/darwin-arm64/package.json index 8cc7752c..202bee5a 100644 --- a/sdks/typescript/npm/darwin-arm64/package.json +++ b/sdks/typescript/npm/darwin-arm64/package.json @@ -1,6 +1,6 @@ { "name": "thetadatadx-darwin-arm64", - "version": "8.0.35", + "version": "8.0.36", "os": [ "darwin" ], diff --git a/sdks/typescript/npm/linux-x64-gnu/package.json b/sdks/typescript/npm/linux-x64-gnu/package.json index 332e9401..d60c8584 100644 --- a/sdks/typescript/npm/linux-x64-gnu/package.json +++ b/sdks/typescript/npm/linux-x64-gnu/package.json @@ -1,6 +1,6 @@ { "name": "thetadatadx-linux-x64-gnu", - "version": "8.0.35", + "version": "8.0.36", "os": [ "linux" ], diff --git a/sdks/typescript/npm/win32-x64-msvc/package.json b/sdks/typescript/npm/win32-x64-msvc/package.json index 9a16e6ec..9544e5ad 100644 --- a/sdks/typescript/npm/win32-x64-msvc/package.json +++ b/sdks/typescript/npm/win32-x64-msvc/package.json @@ -1,6 +1,6 @@ { "name": "thetadatadx-win32-x64-msvc", - "version": "8.0.35", + "version": "8.0.36", "os": [ "win32" ], diff --git a/sdks/typescript/package.json b/sdks/typescript/package.json index c328f71f..785555bf 100644 --- a/sdks/typescript/package.json +++ b/sdks/typescript/package.json @@ -1,6 +1,6 @@ { "name": "thetadatadx", - "version": "8.0.35", + "version": "8.0.36", "description": "Native ThetaData SDK for Node.js — powered by Rust via napi-rs", "license": "Apache-2.0", "repository": { @@ -30,9 +30,9 @@ "@napi-rs/cli": "^3.6.2" }, "optionalDependencies": { - "thetadatadx-linux-x64-gnu": "8.0.35", - "thetadatadx-darwin-arm64": "8.0.35", - "thetadatadx-win32-x64-msvc": "8.0.35" + "thetadatadx-linux-x64-gnu": "8.0.36", + "thetadatadx-darwin-arm64": "8.0.36", + "thetadatadx-win32-x64-msvc": "8.0.36" }, "engines": { "node": ">= 20" diff --git a/tools/cli/Cargo.toml b/tools/cli/Cargo.toml index 909c2832..dfaf2ce1 100644 --- a/tools/cli/Cargo.toml +++ b/tools/cli/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "thetadatadx-cli" -version = "8.0.35" +version = "8.0.36" edition.workspace = true rust-version.workspace = true authors.workspace = true @@ -21,7 +21,7 @@ path = "src/main.rs" [dependencies] thetadatadx = { path = "../../crates/thetadatadx" } -tdbe = { version = "0.12.9", path = "../../crates/tdbe" } +tdbe = { version = "0.12.10", path = "../../crates/tdbe" } clap = { version = "4.6.1", features = ["derive"] } tokio = { version = "1.52.1", features = ["rt-multi-thread", "macros"] } sonic-rs = "0.5.8" diff --git a/tools/mcp/Cargo.lock b/tools/mcp/Cargo.lock index 982e811f..c1fb8ad8 100644 --- a/tools/mcp/Cargo.lock +++ b/tools/mcp/Cargo.lock @@ -1938,10 +1938,12 @@ dependencies = [ [[package]] name = "tdbe" -version = "0.12.9" +version = "0.12.10" dependencies = [ + "serde", "sonic-rs", "thiserror 2.0.18", + "toml", ] [[package]] @@ -1959,7 +1961,7 @@ dependencies = [ [[package]] name = "thetadatadx" -version = "8.0.35" +version = "8.0.36" dependencies = [ "crossbeam-channel", "disruptor", @@ -1994,7 +1996,7 @@ dependencies = [ [[package]] name = "thetadatadx-mcp" -version = "8.0.35" +version = "8.0.36" dependencies = [ "serde", "sonic-rs", diff --git a/tools/mcp/Cargo.toml b/tools/mcp/Cargo.toml index 4e78cf35..acfc4262 100644 --- a/tools/mcp/Cargo.toml +++ b/tools/mcp/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "thetadatadx-mcp" -version = "8.0.35" +version = "8.0.36" edition = "2021" description = "MCP server for ThetaDataDx — gives LLMs instant access to ThetaData market data" license = "Apache-2.0" @@ -12,7 +12,7 @@ path = "src/main.rs" [dependencies] thetadatadx = { path = "../../crates/thetadatadx" } -tdbe = { version = "0.12.9", path = "../../crates/tdbe" } +tdbe = { version = "0.12.10", path = "../../crates/tdbe" } tokio = { version = "1.52.1", features = ["rt-multi-thread", "macros", "io-util", "io-std"] } serde = { version = "1.0.228", features = ["derive"] } sonic-rs = "0.5.8" diff --git a/tools/server/Cargo.lock b/tools/server/Cargo.lock index ed9fbd9e..064455c1 100644 --- a/tools/server/Cargo.lock +++ b/tools/server/Cargo.lock @@ -2331,10 +2331,12 @@ dependencies = [ [[package]] name = "tdbe" -version = "0.12.9" +version = "0.12.10" dependencies = [ + "serde", "sonic-rs", "thiserror 2.0.18", + "toml", ] [[package]] @@ -2352,7 +2354,7 @@ dependencies = [ [[package]] name = "thetadatadx" -version = "8.0.35" +version = "8.0.36" dependencies = [ "crossbeam-channel", "disruptor", @@ -2387,7 +2389,7 @@ dependencies = [ [[package]] name = "thetadatadx-server" -version = "8.0.35" +version = "8.0.36" dependencies = [ "axum", "clap", diff --git a/tools/server/Cargo.toml b/tools/server/Cargo.toml index 7278e97b..7501aeb3 100644 --- a/tools/server/Cargo.toml +++ b/tools/server/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "thetadatadx-server" -version = "8.0.35" +version = "8.0.36" edition = "2021" rust-version = "1.85" authors = ["userFRM"] @@ -21,7 +21,7 @@ path = "src/main.rs" [dependencies] thetadatadx = { path = "../../crates/thetadatadx", features = ["config-file"] } rustls = { version = "0.23.38", features = ["ring"] } -tdbe = { version = "0.12.9", path = "../../crates/tdbe" } +tdbe = { version = "0.12.10", path = "../../crates/tdbe" } axum = { version = "0.8.9", features = ["ws"] } tokio = { version = "1.52.1", features = ["full"] } sonic-rs = "0.5.8"