diff --git a/CHANGELOG.md b/CHANGELOG.md index 29837eda..6815d4d9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,25 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [8.0.34] - 2026-05-07 + +### Changed + +- **`crates/thetadatadx/src/decode.rs` (2177 LoC) split into + `mdds/decode/{error,headers,transport,extract,cell,v3}` modules.** + Pure structural refactor; public API unchanged. Re-exports + preserved at `thetadatadx::mdds::decode::*`. + +- **Eastern-time + DST primitives lifted to `tdbe::time`.** + `eastern_offset_ms`, `march_second_sunday_utc`, + `november_first_sunday_utc`, `april_first_sunday_utc`, + `october_last_sunday_utc`, `civil_to_epoch_days`, + `timestamp_to_ms_of_day`, `timestamp_to_date` — single canonical + module reused by mdds, fpss, flatfiles. Patch bump tdbe 0.12.9 + → 0.12.10. + + Refs #500. + ## [8.0.33] - 2026-05-07 ### Added diff --git a/Cargo.lock b/Cargo.lock index b2c3fe1c..fd56e940 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3075,7 +3075,7 @@ dependencies = [ [[package]] name = "tdbe" -version = "0.12.9" +version = "0.12.10" dependencies = [ "criterion", "sonic-rs", @@ -3097,7 +3097,7 @@ dependencies = [ [[package]] name = "thetadatadx" -version = "8.0.33" +version = "8.0.34" dependencies = [ "arrow-array", "arrow-schema", @@ -3137,7 +3137,7 @@ dependencies = [ [[package]] name = "thetadatadx-cli" -version = "8.0.33" +version = "8.0.34" dependencies = [ "clap", "comfy-table", @@ -3149,7 +3149,7 @@ dependencies = [ [[package]] name = "thetadatadx-ffi" -version = "8.0.33" +version = "8.0.34" dependencies = [ "tdbe", "thetadatadx", diff --git a/crates/tdbe/Cargo.toml b/crates/tdbe/Cargo.toml index d56e73b3..ae705e25 100644 --- a/crates/tdbe/Cargo.toml +++ b/crates/tdbe/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "tdbe" -version = "0.12.9" +version = "0.12.10" edition.workspace = true rust-version.workspace = true authors.workspace = true diff --git a/crates/tdbe/src/latency.rs b/crates/tdbe/src/latency.rs index 97b66f29..f23f33bb 100644 --- a/crates/tdbe/src/latency.rs +++ b/crates/tdbe/src/latency.rs @@ -4,8 +4,10 @@ //! (YYYYMMDD) into epoch nanoseconds, then subtracts from the local //! `received_at_ns` wall-clock timestamp captured at frame decode time. //! -//! No external timezone crate -- uses the same civil-date math and US DST -//! rules (Energy Policy Act 2005) as `thetadatadx::decode`. +//! Civil-date / DST primitives live in [`crate::time`]; this module is a +//! thin wrapper that adds the YYYYMMDD-and-`ms_of_day` decomposition. + +use crate::time::{civil_to_epoch_days, eastern_offset_ms}; /// Compute wire-to-application latency in nanoseconds. /// @@ -52,88 +54,6 @@ fn exchange_epoch_ns(ms_of_day: i32, date_yyyymmdd: i32) -> i64 { exchange_epoch_ms * 1_000_000 } -// --------------------------------------------------------------------------- -// Civil-date / DST helpers (same algorithm as thetadatadx::decode) -// --------------------------------------------------------------------------- - -/// Convert civil date to days since 1970-01-01 (Euclidean algorithm). -// Reason: the Euclidean date algorithm uses intentional signed/unsigned conversions -// that are safe for all valid calendar dates (year 0..9999). -#[allow(clippy::cast_sign_loss, clippy::cast_possible_wrap)] -fn civil_to_epoch_days(year: i32, month: u32, day: u32) -> i64 { - let y = if month <= 2 { - i64::from(year) - 1 - } else { - i64::from(year) - }; - let m = if month <= 2 { - i64::from(month) + 9 - } else { - i64::from(month) - 3 - }; - let era = if y >= 0 { y } else { y - 399 } / 400; - let yoe = (y - era * 400) as u64; - let doy = (153 * m as u64 + 2) / 5 + u64::from(day) - 1; - let doe = yoe * 365 + yoe / 4 - yoe / 100 + doy; - era * 146_097 + doe as i64 - 719_468 -} - -/// Eastern Time UTC offset in milliseconds for a given `epoch_ms`. -/// -/// US DST rule (Energy Policy Act of 2005): -/// - EDT (UTC-4): second Sunday of March 2:00 AM local -> first Sunday of November 2:00 AM local -/// - EST (UTC-5): rest of the year -// Reason: the Euclidean date algorithm uses intentional signed/unsigned conversions -// that are safe for all valid epoch timestamps in the market data date range. -#[allow( - clippy::cast_possible_wrap, - clippy::cast_sign_loss, - clippy::cast_possible_truncation -)] -fn eastern_offset_ms(epoch_ms: u64) -> i64 { - let epoch_secs = epoch_ms as i64 / 1_000; - let days_since_epoch = epoch_secs / 86_400; - - // Civil date from days since 1970-01-01. - let z = days_since_epoch + 719_468; - let era = if z >= 0 { z } else { z - 146_096 } / 146_097; - let doe = (z - era * 146_097) as u32; - let yoe = (doe - doe / 1_460 + doe / 36_524 - doe / 146_096) / 365; - let year = yoe as i32 + (era * 400) as i32; - let doy = doe - (365 * yoe + yoe / 4 - yoe / 100); - let mp = (5 * doy + 2) / 153; - let month = if mp < 10 { mp + 3 } else { mp - 9 }; - let year = if month <= 2 { year + 1 } else { year }; - - let dst_start_utc = march_second_sunday_utc(year); - let dst_end_utc = november_first_sunday_utc(year); - - let epoch_ms_i64 = epoch_ms as i64; - if epoch_ms_i64 >= dst_start_utc && epoch_ms_i64 < dst_end_utc { - -4 * 3_600 * 1_000 // EDT - } else { - -5 * 3_600 * 1_000 // EST - } -} - -/// Epoch ms of the second Sunday of March at 7:00 AM UTC (= 2:00 AM EST). -fn march_second_sunday_utc(year: i32) -> i64 { - let mar1 = civil_to_epoch_days(year, 3, 1); - let dow = ((mar1 + 3) % 7 + 7) % 7; // 0=Mon..6=Sun - let days_to_first_sunday = (6 - dow + 7) % 7; - let second_sunday = mar1 + days_to_first_sunday + 7; - second_sunday * 86_400_000 + 7 * 3_600 * 1_000 -} - -/// Epoch ms of the first Sunday of November at 6:00 AM UTC (= 2:00 AM EDT). -fn november_first_sunday_utc(year: i32) -> i64 { - let nov1 = civil_to_epoch_days(year, 11, 1); - let dow = ((nov1 + 3) % 7 + 7) % 7; - let days_to_first_sunday = (6 - dow + 7) % 7; - let first_sunday = nov1 + days_to_first_sunday; - first_sunday * 86_400_000 + 6 * 3_600 * 1_000 -} - #[cfg(test)] mod tests { use super::*; diff --git a/crates/tdbe/src/lib.rs b/crates/tdbe/src/lib.rs index 8abcf4eb..41931242 100644 --- a/crates/tdbe/src/lib.rs +++ b/crates/tdbe/src/lib.rs @@ -23,6 +23,7 @@ pub mod json_canon; pub mod latency; pub mod right; pub mod sequences; +pub mod time; pub mod types; // Convenience re-exports at crate root diff --git a/crates/tdbe/src/time.rs b/crates/tdbe/src/time.rs new file mode 100644 index 00000000..a67c3324 --- /dev/null +++ b/crates/tdbe/src/time.rs @@ -0,0 +1,245 @@ +//! Eastern Time + DST primitives. +//! +//! Canonical Eastern-time conversion module reused by `thetadatadx` (mdds +//! decode + flatfiles) and the `tdbe` latency path. No external timezone +//! crate dependencies — pure civil-date arithmetic with the documented US +//! DST rules. +//! +//! ## DST rules +//! +//! **2007-onward** (Energy Policy Act of 2005): +//! - EDT (UTC-4): second Sunday of March at 2:00 AM local -> first Sunday +//! of November at 2:00 AM local +//! - EST (UTC-5): rest of the year +//! +//! **Before 2007** (Uniform Time Act of 1966): +//! - EDT (UTC-4): first Sunday of April at 2:00 AM local -> last Sunday of +//! October at 2:00 AM local +//! - EST (UTC-5): rest of the year +//! +//! Transition points are computed in UTC and compared, so callers do not +//! need to round-trip through a timezone library. + +/// Eastern Time UTC offset in milliseconds for a given `epoch_ms`. +/// +/// Returns `-4 * 3_600_000` (EDT) when DST is in effect for the civil +/// year of `epoch_ms`; otherwise `-5 * 3_600_000` (EST). DST window +/// selection follows the rules documented at the module level. +// Reason: the Euclidean date algorithm uses intentional signed/unsigned conversions for valid epoch timestamps. +#[allow( + clippy::cast_possible_wrap, + clippy::cast_sign_loss, + clippy::cast_possible_truncation +)] +#[must_use] +pub fn eastern_offset_ms(epoch_ms: u64) -> i64 { + // First, determine the UTC year/month/day to find DST boundaries. + let epoch_secs = epoch_ms as i64 / 1_000; + let days_since_epoch = epoch_secs / 86_400; + + // Civil date from days since 1970-01-01 (Euclidean algorithm). + let z = days_since_epoch + 719_468; + let era = if z >= 0 { z } else { z - 146_096 } / 146_097; + let doe = (z - era * 146_097) as u32; + let yoe = (doe - doe / 1460 + doe / 36524 - doe / 146_096) / 365; + let year = yoe as i32 + (era * 400) as i32; + let doy = doe - (365 * yoe + yoe / 4 - yoe / 100); + let mp = (5 * doy + 2) / 153; + let month = if mp < 10 { mp + 3 } else { mp - 9 }; + let year = if month <= 2 { year + 1 } else { year }; + + let (dst_start_utc, dst_end_utc) = if year >= 2007 { + // Post-2007: second Sunday of March -> first Sunday of November. + ( + march_second_sunday_utc(year), + november_first_sunday_utc(year), + ) + } else { + // Pre-2007: first Sunday of April -> last Sunday of October. + (april_first_sunday_utc(year), october_last_sunday_utc(year)) + }; + + let epoch_ms_i64 = epoch_ms as i64; + if epoch_ms_i64 >= dst_start_utc && epoch_ms_i64 < dst_end_utc { + -4 * 3_600 * 1_000 // EDT + } else { + -5 * 3_600 * 1_000 // EST + } +} + +/// Epoch ms of the second Sunday of March at 7:00 AM UTC (= 2:00 AM EST). +#[must_use] +pub fn march_second_sunday_utc(year: i32) -> i64 { + // March 1 day-of-week, then find second Sunday. + let mar1 = civil_to_epoch_days(year, 3, 1); + // 1970-01-01 is Thursday. (days + 3) % 7 gives 0=Mon..6=Sun. + let dow = ((mar1 + 3) % 7 + 7) % 7; + let days_to_first_sunday = (6 - dow + 7) % 7; // days from Mar 1 to first Sunday + let second_sunday = mar1 + days_to_first_sunday + 7; // second Sunday + second_sunday * 86_400_000 + 7 * 3_600 * 1_000 // 7:00 AM UTC = 2:00 AM EST +} + +/// Epoch ms of the first Sunday of November at 6:00 AM UTC (= 2:00 AM EDT). +#[must_use] +pub fn november_first_sunday_utc(year: i32) -> i64 { + let nov1 = civil_to_epoch_days(year, 11, 1); + let dow = ((nov1 + 3) % 7 + 7) % 7; + let days_to_first_sunday = (6 - dow + 7) % 7; + let first_sunday = nov1 + days_to_first_sunday; + first_sunday * 86_400_000 + 6 * 3_600 * 1_000 // 6:00 AM UTC = 2:00 AM EDT +} + +/// Epoch ms of the first Sunday of April at 7:00 AM UTC (= 2:00 AM EST). +/// +/// Used for pre-2007 DST start (Uniform Time Act of 1966). +#[must_use] +pub fn april_first_sunday_utc(year: i32) -> i64 { + let apr1 = civil_to_epoch_days(year, 4, 1); + let dow = ((apr1 + 3) % 7 + 7) % 7; + let days_to_first_sunday = (6 - dow + 7) % 7; + let first_sunday = apr1 + days_to_first_sunday; + first_sunday * 86_400_000 + 7 * 3_600 * 1_000 // 7:00 AM UTC = 2:00 AM EST +} + +/// Epoch ms of the last Sunday of October at 6:00 AM UTC (= 2:00 AM EDT). +/// +/// Used for pre-2007 DST end (Uniform Time Act of 1966). +#[must_use] +pub fn october_last_sunday_utc(year: i32) -> i64 { + // Start from October 31 and walk back to find the last Sunday. + let oct31 = civil_to_epoch_days(year, 10, 31); + let dow = ((oct31 + 3) % 7 + 7) % 7; // 0=Mon..6=Sun + let days_back = (dow + 1) % 7; // days back from Oct 31 to last Sunday + let last_sunday = oct31 - days_back; + last_sunday * 86_400_000 + 6 * 3_600 * 1_000 // 6:00 AM UTC = 2:00 AM EDT +} + +/// Convert civil date to days since 1970-01-01 (inverse of the Euclidean algorithm). +// Reason: the Euclidean date algorithm uses intentional signed/unsigned conversions for valid calendar dates. +#[allow(clippy::cast_sign_loss, clippy::cast_possible_wrap)] +#[must_use] +pub fn civil_to_epoch_days(year: i32, month: u32, day: u32) -> i64 { + let y = if month <= 2 { + i64::from(year) - 1 + } else { + i64::from(year) + }; + let m = if month <= 2 { + i64::from(month) + 9 + } else { + i64::from(month) - 3 + }; + let era = if y >= 0 { y } else { y - 399 } / 400; + let yoe = (y - era * 400) as u64; + let doy = (153 * m as u64 + 2) / 5 + u64::from(day) - 1; + let doe = yoe * 365 + yoe / 4 - yoe / 100 + doy; + era * 146_097 + doe as i64 - 719_468 +} + +/// Convert `epoch_ms` to milliseconds-of-day in Eastern Time (DST-aware). +// Reason: ms_of_day fits in i32; epoch_ms is in valid market data range. +#[allow(clippy::cast_possible_wrap, clippy::cast_possible_truncation)] +#[must_use] +pub fn timestamp_to_ms_of_day(epoch_ms: u64) -> i32 { + let offset = eastern_offset_ms(epoch_ms); + let local_ms = epoch_ms as i64 + offset; + (local_ms.rem_euclid(86_400_000)) as i32 +} + +/// Convert `epoch_ms` to YYYYMMDD date integer in Eastern Time (DST-aware). +// Reason: date components fit in i32; epoch_ms is in valid market data range. +#[allow( + clippy::cast_possible_wrap, + clippy::cast_sign_loss, + clippy::cast_possible_truncation +)] +#[must_use] +pub fn timestamp_to_date(epoch_ms: u64) -> i32 { + let offset = eastern_offset_ms(epoch_ms); + let local_secs = (epoch_ms as i64 + offset) / 1_000; + let days = local_secs / 86400 + 719_468; + let era = if days >= 0 { days } else { days - 146_096 } / 146_097; + let doe = (days - era * 146_097) as u32; + let yoe = (doe - doe / 1460 + doe / 36524 - doe / 146_096) / 365; + let y = i64::from(yoe) + era * 400; + let doy = doe - (365 * yoe + yoe / 4 - yoe / 100); + let mp = (5 * doy + 2) / 153; + let d = doy - (153 * mp + 2) / 5 + 1; + let m = if mp < 10 { mp + 3 } else { mp - 9 }; + let y = if m <= 2 { y + 1 } else { y }; + (y as i32) * 10_000 + (m as i32) * 100 + (d as i32) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + // Reason: ms_of_day fits in i32; epoch_ms is in valid market data range. + #[allow(clippy::cast_possible_wrap, clippy::cast_possible_truncation)] + fn timestamp_to_ms_of_day_edt() { + // 2026-04-01 09:30:00 ET (EDT, UTC-4) = 2026-04-01 13:30:00 UTC + let epoch_ms: u64 = 1_775_050_200_000; // Apr 1 2026, 13:30 UTC + let ms = timestamp_to_ms_of_day(epoch_ms); + assert_eq!(ms, 34_200_000, "9:30 AM ET in milliseconds"); + } + + #[test] + // Reason: ms_of_day fits in i32; epoch_ms is in valid market data range. + #[allow(clippy::cast_possible_wrap, clippy::cast_possible_truncation)] + fn timestamp_to_ms_of_day_est() { + // 2026-01-15 09:30:00 ET (EST, UTC-5) = 2026-01-15 14:30:00 UTC + let epoch_ms: u64 = 1_768_487_400_000; + let ms = timestamp_to_ms_of_day(epoch_ms); + assert_eq!(ms, 34_200_000, "9:30 AM ET in milliseconds (winter)"); + } + + #[test] + fn timestamp_to_date_edt() { + let epoch_ms: u64 = 1_775_050_200_000; // Apr 1 2026, 13:30 UTC + let date = timestamp_to_date(epoch_ms); + assert_eq!(date, 20260401); + } + + #[test] + fn timestamp_to_date_est() { + let epoch_ms: u64 = 1_768_487_400_000; // Jan 15 2026, 14:30 UTC + let date = timestamp_to_date(epoch_ms); + assert_eq!(date, 20260115); + } + + #[test] + fn dst_transition_march_2026() { + // 2026 DST starts March 8 (second Sunday of March) + // Before: EST (UTC-5) at 06:59 UTC. After: EDT (UTC-4) at 07:01 UTC. + let before: u64 = 1_772_953_140_000; // Mar 8 2026, 06:59 UTC + assert_eq!(eastern_offset_ms(before), -5 * 3_600 * 1_000); + let after: u64 = 1_772_953_260_000; // Mar 8 2026, 07:01 UTC + assert_eq!(eastern_offset_ms(after), -4 * 3_600 * 1_000); + } + + #[test] + fn pre2007_dst_summer_uses_old_rules() { + // 2006: old rules apply (first Sunday April -> last Sunday October). + // 2006-07-15 18:00:00 UTC = 2006-07-15 14:00:00 EDT (summer, mid-July). + // This is well within DST under both old and new rules, so EDT (UTC-4). + let epoch_ms: u64 = 1_153_065_600_000; // Jul 15 2006, 18:00 UTC + assert_eq!( + eastern_offset_ms(epoch_ms), + -4 * 3_600 * 1_000, + "mid-July 2006 should be EDT under old DST rules" + ); + } + + #[test] + fn pre2007_est_before_april_dst_start() { + // 2006: old rules — DST starts first Sunday of April (April 2, 2006). + // 2006-02-15 15:00:00 UTC = 2006-02-15 10:00:00 EST (winter, mid-Feb). + let epoch_ms: u64 = 1_140_015_600_000; // Feb 15 2006, 15:00 UTC + assert_eq!( + eastern_offset_ms(epoch_ms), + -5 * 3_600 * 1_000, + "mid-February 2006 should be EST under old DST rules" + ); + } +} diff --git a/crates/thetadatadx/Cargo.toml b/crates/thetadatadx/Cargo.toml index 101fb0c7..a257b407 100644 --- a/crates/thetadatadx/Cargo.toml +++ b/crates/thetadatadx/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "thetadatadx" -version = "8.0.33" +version = "8.0.34" edition.workspace = true rust-version.workspace = true authors.workspace = true @@ -40,7 +40,7 @@ frames = ["polars", "arrow"] live-tests = [] [dependencies] -tdbe = { version = "0.12.9", path = "../tdbe" } +tdbe = { version = "0.12.10", path = "../tdbe" } # gRPC + protobuf (tonic 0.14 extracted prost codec into tonic-prost) tonic = { version = "=0.14.5", features = ["tls-ring", "tls-native-roots", "channel", "transport"] } @@ -141,7 +141,7 @@ prost-build = "=0.14.3" regex = "1.12.3" toml = "1.1.2" serde = { version = "1.0.228", features = ["derive"] } -tdbe = { version = "0.12.9", path = "../tdbe" } +tdbe = { version = "0.12.10", path = "../tdbe" } [[bench]] name = "bench_decode" diff --git a/crates/thetadatadx/build_support/ticks/templates/parser/eod_date.rs.tmpl b/crates/thetadatadx/build_support/ticks/templates/parser/eod_date.rs.tmpl index 73bb9c93..9a52eac4 100644 --- a/crates/thetadatadx/build_support/ticks/templates/parser/eod_date.rs.tmpl +++ b/crates/thetadatadx/build_support/ticks/templates/parser/eod_date.rs.tmpl @@ -6,7 +6,7 @@ match dv.data_type.as_ref() { Some(crate::proto::data_value::DataType::Number(n)) => Ok(*n as i32), Some(crate::proto::data_value::DataType::Price(p)) => Ok(p.value), - Some(crate::proto::data_value::DataType::Timestamp(ts)) => Ok(crate::decode::timestamp_to_date(ts.epoch_ms)), + Some(crate::proto::data_value::DataType::Timestamp(ts)) => Ok(tdbe::time::timestamp_to_date(ts.epoch_ms)), Some(crate::proto::data_value::DataType::NullValue(_)) => Ok(0), None => Err(DecodeError::TypeMismatch { column: idx, diff --git a/crates/thetadatadx/build_support/ticks/templates/parser/eod_num.rs.tmpl b/crates/thetadatadx/build_support/ticks/templates/parser/eod_num.rs.tmpl index 202ba1eb..d152686e 100644 --- a/crates/thetadatadx/build_support/ticks/templates/parser/eod_num.rs.tmpl +++ b/crates/thetadatadx/build_support/ticks/templates/parser/eod_num.rs.tmpl @@ -6,7 +6,7 @@ match dv.data_type.as_ref() { Some(crate::proto::data_value::DataType::Number(n)) => Ok(*n as i32), Some(crate::proto::data_value::DataType::Price(p)) => Ok(p.value), - Some(crate::proto::data_value::DataType::Timestamp(ts)) => Ok(crate::decode::timestamp_to_ms_of_day(ts.epoch_ms)), + Some(crate::proto::data_value::DataType::Timestamp(ts)) => Ok(tdbe::time::timestamp_to_ms_of_day(ts.epoch_ms)), Some(crate::proto::data_value::DataType::NullValue(_)) => Ok(0), None => Err(DecodeError::TypeMismatch { column: idx, diff --git a/crates/thetadatadx/build_support/ticks/templates/parser/eod_num64.rs.tmpl b/crates/thetadatadx/build_support/ticks/templates/parser/eod_num64.rs.tmpl index e5615273..f22a9a58 100644 --- a/crates/thetadatadx/build_support/ticks/templates/parser/eod_num64.rs.tmpl +++ b/crates/thetadatadx/build_support/ticks/templates/parser/eod_num64.rs.tmpl @@ -6,7 +6,7 @@ match dv.data_type.as_ref() { Some(crate::proto::data_value::DataType::Number(n)) => Ok(*n), Some(crate::proto::data_value::DataType::Price(p)) => Ok(i64::from(p.value)), - Some(crate::proto::data_value::DataType::Timestamp(ts)) => Ok(i64::from(crate::decode::timestamp_to_ms_of_day(ts.epoch_ms))), + Some(crate::proto::data_value::DataType::Timestamp(ts)) => Ok(i64::from(tdbe::time::timestamp_to_ms_of_day(ts.epoch_ms))), Some(crate::proto::data_value::DataType::NullValue(_)) => Ok(0), None => Err(DecodeError::TypeMismatch { column: idx, diff --git a/crates/thetadatadx/src/decode.rs b/crates/thetadatadx/src/decode.rs deleted file mode 100644 index 3da82e35..00000000 --- a/crates/thetadatadx/src/decode.rs +++ /dev/null @@ -1,2177 +0,0 @@ -use std::cell::RefCell; - -use crate::error::Error; -use crate::proto; -use tdbe::types::tick::{ - CalendarDay, EodTick, GreeksAllTick, GreeksFirstOrderTick, GreeksSecondOrderTick, - GreeksThirdOrderTick, InterestRateTick, IvTick, MarketValueTick, OhlcTick, OpenInterestTick, - OptionContract, PriceTick, QuoteTick, TradeQuoteTick, TradeTick, -}; -use thiserror::Error as ThisError; - -/// Per-cell decode failure. Produced by the `row_*` helpers when a cell does -/// not match the column's declared type, or when the requested column index is -/// past the end of the row. Mirrors the Java terminal's `IllegalArgumentException` -/// path in `PojoMessageUtils.convert`. -#[derive(Debug, ThisError, PartialEq, Eq)] -pub enum DecodeError { - /// Cell exists but its `DataType` variant does not match the declared - /// schema for this column. - #[error("column {column}: expected {expected}, got {observed}")] - TypeMismatch { - column: usize, - expected: &'static str, - observed: &'static str, - }, - /// Row has fewer cells than the requested column index. - #[error("column {column}: missing cell")] - MissingCell { column: usize }, - /// A required header (declared in `tick_schema.toml` under - /// `required = [...]`) is absent from a non-empty `DataTable`. Emitted by - /// the generated parsers when the server has added or renamed the column — - /// surfacing this as an error is the only way to prevent silent data loss - /// when the upstream schema drifts (see `HEADER_ALIASES` for known - /// synonyms). Empty `DataTable`s (no rows) still return `Ok(vec![])` - /// because "no trades today" is a legitimate outcome. - #[error( - "required column `{header}` missing from {rows}-row DataTable; \ - available headers: {available}" - )] - MissingRequiredHeader { - header: &'static str, - rows: usize, - available: String, - }, - /// A mid-stream gRPC chunk carries a header set that does not match the - /// header set established by the first chunk. The stream accumulator - /// used to silently retain the first header set and accumulate rows - /// from every chunk underneath it, which would transparently corrupt - /// a row set if the server's wire schema changed mid-response. This - /// variant surfaces the drift instead of hiding it. - #[error( - "chunk {chunk_index} headers drifted from first-chunk schema; \ - first: [{first}]; chunk: [{chunk}]" - )] - ChunkHeaderDrift { - chunk_index: usize, - first: String, - chunk: String, - }, -} - -/// Name the `DataType` variant for error messages. `None` is treated as a -/// missing `data_type` oneof (protobuf cell with no variant set). -pub(crate) fn observed_name(dt: Option<&proto::data_value::DataType>) -> &'static str { - match dt { - Some(proto::data_value::DataType::Number(_)) => "Number", - Some(proto::data_value::DataType::Text(_)) => "Text", - Some(proto::data_value::DataType::Price(_)) => "Price", - Some(proto::data_value::DataType::Timestamp(_)) => "Timestamp", - Some(proto::data_value::DataType::NullValue(_)) => "NullValue", - None => "Unset", - } -} - -/// Header aliases: v3 MDDS uses different column names than the tick schema. -/// This maps schema names to their v3 equivalents so parsers work with both. -/// -/// Validated against a real v3 MDDS response capture (see -/// `tests/fixtures/captures/`). Each entry is `(schema_name, server_name)`: -/// `find_header("ms_of_day", h)` returns the index of the first matching -/// server column in `h`. -const HEADER_ALIASES: &[(&str, &str)] = &[ - // Generic time column: MDDS sends a proto `Timestamp`, the tick schema - // models it as an i32 ms-of-day. `row_number` handles the conversion. - ("ms_of_day", "timestamp"), - ("ms_of_day", "created"), - // Combined trade + quote responses split the two time columns into - // `trade_timestamp` (the trade side → `ms_of_day`) and `quote_timestamp` - // (the quote side → `quote_ms_of_day`). Without these aliases the - // `TradeQuoteTick` parser falls through the required-header guard and - // produces an empty Vec on ~1M-row responses (P11). - ("ms_of_day", "trade_timestamp"), - ("quote_ms_of_day", "quote_timestamp"), - ("ms_of_day2", "timestamp2"), - ("ms_of_day2", "last_trade"), - ("date", "timestamp"), - ("date", "created"), - ("date", "trade_timestamp"), - // option_list_contracts returns "symbol" where the schema says "root" - ("root", "symbol"), - // v3 uses "implied_vol" where the schema says "implied_volatility" - ("implied_volatility", "implied_vol"), - // The vendor's per-order Greeks endpoints (`option_*_greeks_*_order`) - // and the `_greeks_all` / `_greeks_eod` endpoints publish the - // underlying snapshot timestamp as `underlying_timestamp`. The tick - // schema models it as `underlying_ms_of_day` so the wire conversion - // (Timestamp -> ms-of-day) flows through the standard `row_number` - // path without a per-tick parser branch. - ("underlying_ms_of_day", "underlying_timestamp"), -]; - -/// Helper: find a column index by name, with alias fallback. -/// -/// The v3 MDDS server uses `timestamp` where the tick schema says `ms_of_day`. -/// This function checks the primary name first, then falls back to known aliases. -/// -/// Returns `None` silently when the header is absent — required-header -/// guards in the generated parsers surface a typed -/// [`Error::MissingRequiredHeader`] for the must-have columns; optional -/// columns missing from a subset response (e.g. `option_snapshot_greeks_third_order` -/// returning only the third-order Greek columns from the `GreeksTick` -/// union schema) are by design. Header drift can be observed at the -/// `trace` level via `RUST_LOG=thetadatadx=trace`. -fn find_header(headers: &[&str], name: &str) -> Option { - // Try exact match first. - if let Some(pos) = headers.iter().position(|&s| s == name) { - return Some(pos); - } - // Try aliases. - for &(schema_name, server_name) in HEADER_ALIASES { - if name == schema_name { - if let Some(pos) = headers.iter().position(|&s| s == server_name) { - return Some(pos); - } - } - } - tracing::trace!( - header = name, - "column header not present in DataTable (optional or subset response)" - ); - None -} - -/// Eastern Time UTC offset in milliseconds for a given `epoch_ms`. -/// -/// US DST rules changed over time: -/// -/// **2007-onward** (Energy Policy Act of 2005): -/// - EDT (UTC-4): second Sunday of March at 2:00 AM local -> first Sunday of November at 2:00 AM local -/// - EST (UTC-5): rest of the year -/// -/// **Before 2007** (Uniform Time Act of 1966): -/// - EDT (UTC-4): first Sunday of April at 2:00 AM local -> last Sunday of October at 2:00 AM local -/// - EST (UTC-5): rest of the year -/// -/// We compute the transition points in UTC and compare. This avoids -/// external timezone crate dependencies while being correct for all -/// dates with US Eastern Time DST rules. -// Reason: the Euclidean date algorithm uses intentional signed/unsigned conversions for valid epoch timestamps. -#[allow( - clippy::cast_possible_wrap, - clippy::cast_sign_loss, - clippy::cast_possible_truncation -)] -fn eastern_offset_ms(epoch_ms: u64) -> i64 { - // First, determine the UTC year/month/day to find DST boundaries. - let epoch_secs = epoch_ms as i64 / 1_000; - let days_since_epoch = epoch_secs / 86_400; - - // Civil date from days since 1970-01-01 (Euclidean algorithm). - let z = days_since_epoch + 719_468; - let era = if z >= 0 { z } else { z - 146_096 } / 146_097; - let doe = (z - era * 146_097) as u32; - let yoe = (doe - doe / 1460 + doe / 36524 - doe / 146_096) / 365; - let year = yoe as i32 + (era * 400) as i32; - let doy = doe - (365 * yoe + yoe / 4 - yoe / 100); - let mp = (5 * doy + 2) / 153; - let month = if mp < 10 { mp + 3 } else { mp - 9 }; - let year = if month <= 2 { year + 1 } else { year }; - - let (dst_start_utc, dst_end_utc) = if year >= 2007 { - // Post-2007: second Sunday of March -> first Sunday of November. - ( - march_second_sunday_utc(year), - november_first_sunday_utc(year), - ) - } else { - // Pre-2007: first Sunday of April -> last Sunday of October. - (april_first_sunday_utc(year), october_last_sunday_utc(year)) - }; - - let epoch_ms_i64 = epoch_ms as i64; - if epoch_ms_i64 >= dst_start_utc && epoch_ms_i64 < dst_end_utc { - -4 * 3_600 * 1_000 // EDT - } else { - -5 * 3_600 * 1_000 // EST - } -} - -/// Epoch ms of the second Sunday of March at 7:00 AM UTC (= 2:00 AM EST). -fn march_second_sunday_utc(year: i32) -> i64 { - // March 1 day-of-week, then find second Sunday. - let mar1 = civil_to_epoch_days(year, 3, 1); - // 1970-01-01 is Thursday. (days + 3) % 7 gives 0=Mon..6=Sun. - let dow = ((mar1 + 3) % 7 + 7) % 7; - let days_to_first_sunday = (6 - dow + 7) % 7; // days from Mar 1 to first Sunday - let second_sunday = mar1 + days_to_first_sunday + 7; // second Sunday - second_sunday * 86_400_000 + 7 * 3_600 * 1_000 // 7:00 AM UTC = 2:00 AM EST -} - -/// Epoch ms of the first Sunday of November at 6:00 AM UTC (= 2:00 AM EDT). -fn november_first_sunday_utc(year: i32) -> i64 { - let nov1 = civil_to_epoch_days(year, 11, 1); - let dow = ((nov1 + 3) % 7 + 7) % 7; - let days_to_first_sunday = (6 - dow + 7) % 7; - let first_sunday = nov1 + days_to_first_sunday; - first_sunday * 86_400_000 + 6 * 3_600 * 1_000 // 6:00 AM UTC = 2:00 AM EDT -} - -/// Epoch ms of the first Sunday of April at 7:00 AM UTC (= 2:00 AM EST). -/// -/// Used for pre-2007 DST start (Uniform Time Act of 1966). -fn april_first_sunday_utc(year: i32) -> i64 { - let apr1 = civil_to_epoch_days(year, 4, 1); - let dow = ((apr1 + 3) % 7 + 7) % 7; - let days_to_first_sunday = (6 - dow + 7) % 7; - let first_sunday = apr1 + days_to_first_sunday; - first_sunday * 86_400_000 + 7 * 3_600 * 1_000 // 7:00 AM UTC = 2:00 AM EST -} - -/// Epoch ms of the last Sunday of October at 6:00 AM UTC (= 2:00 AM EDT). -/// -/// Used for pre-2007 DST end (Uniform Time Act of 1966). -fn october_last_sunday_utc(year: i32) -> i64 { - // Start from October 31 and walk back to find the last Sunday. - let oct31 = civil_to_epoch_days(year, 10, 31); - let dow = ((oct31 + 3) % 7 + 7) % 7; // 0=Mon..6=Sun - let days_back = (dow + 1) % 7; // days back from Oct 31 to last Sunday - let last_sunday = oct31 - days_back; - last_sunday * 86_400_000 + 6 * 3_600 * 1_000 // 6:00 AM UTC = 2:00 AM EDT -} - -/// Convert civil date to days since 1970-01-01 (inverse of the Euclidean algorithm). -// Reason: the Euclidean date algorithm uses intentional signed/unsigned conversions for valid calendar dates. -#[allow(clippy::cast_sign_loss, clippy::cast_possible_wrap)] -fn civil_to_epoch_days(year: i32, month: u32, day: u32) -> i64 { - let y = if month <= 2 { - i64::from(year) - 1 - } else { - i64::from(year) - }; - let m = if month <= 2 { - i64::from(month) + 9 - } else { - i64::from(month) - 3 - }; - let era = if y >= 0 { y } else { y - 399 } / 400; - let yoe = (y - era * 400) as u64; - let doy = (153 * m as u64 + 2) / 5 + u64::from(day) - 1; - let doe = yoe * 365 + yoe / 4 - yoe / 100 + doy; - era * 146_097 + doe as i64 - 719_468 -} - -/// Convert `epoch_ms` to milliseconds-of-day in Eastern Time (DST-aware). -// Reason: ms_of_day fits in i32; epoch_ms is in valid market data range. -#[allow(clippy::cast_possible_wrap, clippy::cast_possible_truncation)] -pub(crate) fn timestamp_to_ms_of_day(epoch_ms: u64) -> i32 { - let offset = eastern_offset_ms(epoch_ms); - let local_ms = epoch_ms as i64 + offset; - (local_ms.rem_euclid(86_400_000)) as i32 -} - -/// Convert `epoch_ms` to YYYYMMDD date integer in Eastern Time (DST-aware). -// Reason: date components fit in i32; epoch_ms is in valid market data range. -#[allow( - clippy::cast_possible_wrap, - clippy::cast_sign_loss, - clippy::cast_possible_truncation -)] -pub(crate) fn timestamp_to_date(epoch_ms: u64) -> i32 { - let offset = eastern_offset_ms(epoch_ms); - let local_secs = (epoch_ms as i64 + offset) / 1_000; - let days = local_secs / 86400 + 719_468; - let era = if days >= 0 { days } else { days - 146_096 } / 146_097; - let doe = (days - era * 146_097) as u32; - let yoe = (doe - doe / 1460 + doe / 36524 - doe / 146_096) / 365; - let y = i64::from(yoe) + era * 400; - let doy = doe - (365 * yoe + yoe / 4 - yoe / 100); - let mp = (5 * doy + 2) / 153; - let d = doy - (153 * mp + 2) / 5 + 1; - let m = if mp < 10 { mp + 3 } else { mp - 9 }; - let y = if m <= 2 { y + 1 } else { y }; - (y as i32) * 10_000 + (m as i32) * 100 + (d as i32) -} - -/// Extract a date (YYYYMMDD) from a `Number` or `Timestamp` cell, strictly. -/// -/// Used by generated parsers when the `date` field maps to a `timestamp` column. -/// `Number` carries the date already in YYYYMMDD form; `Timestamp` is converted -/// to an Eastern-Time YYYYMMDD integer. `NullValue` yields `Ok(None)`; any -/// other type yields `Err(TypeMismatch)`. -/// -/// # Errors -/// -/// Returns [`DecodeError::TypeMismatch`] if the cell is neither a `Number`, -/// `Timestamp`, nor `NullValue` — including the case where the `DataValue` -/// arrived with its `data_type` oneof unset (`observed: "Unset"`), which is a -/// wire-protocol anomaly we fail loud on. Returns [`DecodeError::MissingCell`] -/// only when the row has fewer cells than `idx` (index out of bounds). -// Reason: number values from protobuf fit in i32 for date/integer fields. -#[allow(clippy::cast_possible_truncation)] -pub(crate) fn row_date(row: &proto::DataValueList, idx: usize) -> Result, DecodeError> { - let Some(dv) = row.values.get(idx) else { - return Err(DecodeError::MissingCell { column: idx }); - }; - match dv.data_type.as_ref() { - Some(proto::data_value::DataType::Number(n)) => Ok(Some(*n as i32)), - Some(proto::data_value::DataType::Timestamp(ts)) => { - Ok(Some(timestamp_to_date(ts.epoch_ms))) - } - Some(proto::data_value::DataType::NullValue(_)) => Ok(None), - other => Err(DecodeError::TypeMismatch { - column: idx, - expected: "Number|Timestamp", - observed: observed_name(other), - }), - } -} - -thread_local! { - /// Reusable zstd decompressor **and** output buffer — avoids allocating both - /// a fresh decompressor context and a fresh `Vec` on every call. - /// - /// The decompressor context (~128 KB of zstd internal state) is recycled, and - /// the output buffer retains its capacity across calls so that repeated - /// decompressions of similar-sized payloads hit no allocator at all. - /// - /// We use `decompress_to_buffer` which writes into the pre-existing Vec - /// without reallocating when capacity is sufficient. The final `.clone()` - /// is necessary since we return ownership, but the internal buffer capacity - /// persists across calls — the key win is avoiding repeated alloc/dealloc - /// cycles for the working buffer. - static ZSTD_STATE: RefCell<(zstd::bulk::Decompressor<'static>, Vec)> = RefCell::new(( - // Infallible in practice: zstd decompressor creation only fails on OOM. - // thread_local! does not support Result, so unwrap is intentional here. - zstd::bulk::Decompressor::new().expect("zstd decompressor creation failed (possible OOM)"), - Vec::with_capacity(1024 * 1024), // 1 MB initial capacity - )); -} - -/// Decompress a `ResponseData` payload. Returns the raw protobuf bytes of the `DataTable`. -/// -/// # Unknown compression algorithms -/// -/// Prost's `.algo()` silently maps unknown enum values to the default (None=0), -/// so we check the raw i32 to detect truly unknown algorithms. Without this, -/// an unrecognized algorithm would be treated as uncompressed, producing garbage. -/// -/// # Buffer recycling -/// -/// Uses a thread-local `(Decompressor, Vec)` pair. The `Vec` retains its -/// capacity across calls, so repeated decompressions of similar-sized payloads -/// avoid hitting the allocator for the working buffer. The returned `Vec` -/// is a clone (we must return ownership), but the internal slab persists. -/// # Errors -/// -/// Returns [`Error::Decompress`] if the compression algorithm is unknown or -/// zstd decompression fails. -// Reason: original_size is a protobuf u64 that fits in usize for valid payloads. -#[allow(clippy::cast_possible_truncation)] -pub fn decompress_response(response: &proto::ResponseData) -> Result, Error> { - let algo_raw = response - .compression_description - .as_ref() - .map_or(0, |cd| cd.algo); - - match proto::CompressionAlgo::try_from(algo_raw) { - Ok(proto::CompressionAlgo::None) => Ok(response.compressed_data.clone()), - Ok(proto::CompressionAlgo::Zstd) => { - let original_size = usize::try_from(response.original_size).unwrap_or(0); - ZSTD_STATE.with(|cell| { - let (ref mut dec, ref mut buf) = *cell.borrow_mut(); - buf.clear(); - buf.resize(original_size, 0); - let n = dec - .decompress_to_buffer(&response.compressed_data, buf) - .map_err(|e| Error::Decompress(e.to_string()))?; - buf.truncate(n); - Ok(buf.clone()) - }) - } - _ => Err(Error::Decompress(format!( - "unknown compression algorithm: {algo_raw}" - ))), - } -} - -/// Decode a `ResponseData` into a `DataTable`. -/// -/// # Errors -/// -/// Returns [`Error::Decompress`] if decompression fails or [`Error::Decode`] -/// if protobuf deserialization fails. -pub fn decode_data_table(response: &proto::ResponseData) -> Result { - let bytes = decompress_response(response)?; - let table: proto::DataTable = - prost::Message::decode(bytes.as_slice()).map_err(|e| Error::Decode(e.to_string()))?; - Ok(table) -} - -/// Extract a column of i64 values from a `DataTable` by header name. -#[must_use] -pub fn extract_number_column(table: &proto::DataTable, header: &str) -> Vec> { - let Some(col_idx) = table.headers.iter().position(|h| h == header) else { - return vec![]; - }; - - table - .data_table - .iter() - .map(|row| { - row.values - .get(col_idx) - .and_then(|dv| dv.data_type.as_ref()) - .and_then(|dt| match dt { - proto::data_value::DataType::Number(n) => Some(*n), - _ => None, - }) - }) - .collect() -} - -/// Extract a column of string values from a `DataTable` by header name. -#[must_use] -pub fn extract_text_column(table: &proto::DataTable, header: &str) -> Vec> { - let Some(col_idx) = table.headers.iter().position(|h| h == header) else { - return vec![]; - }; - - table - .data_table - .iter() - .map(|row| { - row.values - .get(col_idx) - .and_then(|dv| dv.data_type.as_ref()) - .and_then(|dt| match dt { - proto::data_value::DataType::Text(s) => Some(s.clone()), - proto::data_value::DataType::Number(n) => Some(n.to_string()), - proto::data_value::DataType::Price(p) => { - Some(format!("{}", tdbe::Price::new(p.value, p.r#type).to_f64())) - } - _ => None, - }) - }) - .collect() -} - -/// Extract a column of Price values from a `DataTable` by header name. -#[must_use] -pub fn extract_price_column(table: &proto::DataTable, header: &str) -> Vec> { - let Some(col_idx) = table.headers.iter().position(|h| h == header) else { - return vec![]; - }; - - table - .data_table - .iter() - .map(|row| { - row.values - .get(col_idx) - .and_then(|dv| dv.data_type.as_ref()) - .and_then(|dt| match dt { - proto::data_value::DataType::Price(p) => { - Some(tdbe::Price::new(p.value, p.r#type)) - } - _ => None, - }) - }) - .collect() -} - -/// Decode an `i32`-valued cell with Java-matching strict semantics. -/// -/// Accepts: -/// - `Number(n)` → `Ok(Some(n as i32))`. -/// - `Timestamp(ts)` → `Ok(Some(ms_of_day))` — v3 MDDS sends time columns as -/// proto `Timestamp`; the parser expects milliseconds-of-day in Eastern Time. -/// - `NullValue` → `Ok(None)`, matching Java `null` return. -/// -/// Any other variant produces [`DecodeError::TypeMismatch`], including the -/// case where the `DataValue` arrived with its `data_type` oneof unset -/// (`observed: "Unset"`) — a wire anomaly we fail loud on. A row shorter than -/// `idx` (index out of bounds) produces [`DecodeError::MissingCell`]. -/// -/// # Errors -/// -/// See variant list above. -// Reason: protocol-defined integer widths from Java FPSS specification. -#[allow(clippy::cast_possible_truncation)] -pub(crate) fn row_number( - row: &proto::DataValueList, - idx: usize, -) -> Result, DecodeError> { - let Some(dv) = row.values.get(idx) else { - return Err(DecodeError::MissingCell { column: idx }); - }; - match dv.data_type.as_ref() { - Some(proto::data_value::DataType::Number(n)) => Ok(Some(*n as i32)), - Some(proto::data_value::DataType::Timestamp(ts)) => { - Ok(Some(timestamp_to_ms_of_day(ts.epoch_ms))) - } - Some(proto::data_value::DataType::NullValue(_)) => Ok(None), - other => Err(DecodeError::TypeMismatch { - column: idx, - expected: "Number|Timestamp", - observed: observed_name(other), - }), - } -} - -/// Extract raw price value from a `Price` cell (test-only helper). -/// -/// `Price(p)` → `Ok(Some(p.value))`; `NullValue` → `Ok(None)`; other types -/// error. Missing cell errors. -/// -/// # Errors -/// -/// See [`row_number`]. -#[cfg(test)] -#[allow(clippy::cast_possible_truncation)] -fn row_price_value(row: &proto::DataValueList, idx: usize) -> Result, DecodeError> { - let Some(dv) = row.values.get(idx) else { - return Err(DecodeError::MissingCell { column: idx }); - }; - match dv.data_type.as_ref() { - Some(proto::data_value::DataType::Price(p)) => Ok(Some(p.value)), - Some(proto::data_value::DataType::NullValue(_)) => Ok(None), - other => Err(DecodeError::TypeMismatch { - column: idx, - expected: "Price", - observed: observed_name(other), - }), - } -} - -/// Extract raw price type from a `Price` cell (test-only helper). -/// -/// # Errors -/// -/// See [`row_price_value`]. -#[cfg(test)] -#[allow(clippy::cast_possible_truncation)] -fn row_price_type(row: &proto::DataValueList, idx: usize) -> Result, DecodeError> { - let Some(dv) = row.values.get(idx) else { - return Err(DecodeError::MissingCell { column: idx }); - }; - match dv.data_type.as_ref() { - Some(proto::data_value::DataType::Price(p)) => Ok(Some(p.r#type)), - Some(proto::data_value::DataType::NullValue(_)) => Ok(None), - other => Err(DecodeError::TypeMismatch { - column: idx, - expected: "Price", - observed: observed_name(other), - }), - } -} - -/// Decode a price-valued cell to `f64`, using the cell's own `price_type`. -/// -/// Accepts both `Price` (the schema type) and `Number` — v3 MDDS occasionally -/// sends whole-dollar quantities as plain `Number` cells where the schema -/// would otherwise expect `Price`. `NullValue` returns `Ok(None)`. -/// -/// # Errors -/// -/// Errors on any other cell type or missing cell. -// Reason: protocol-defined integer widths from Java FPSS specification. -#[allow(clippy::cast_possible_truncation)] -pub(crate) fn row_price_f64( - row: &proto::DataValueList, - idx: usize, -) -> Result, DecodeError> { - let Some(dv) = row.values.get(idx) else { - return Err(DecodeError::MissingCell { column: idx }); - }; - match dv.data_type.as_ref() { - Some(proto::data_value::DataType::Price(p)) => Ok(Some( - tdbe::types::price::Price::new(p.value, p.r#type).to_f64(), - )), - Some(proto::data_value::DataType::Number(n)) => Ok(Some(*n as f64)), - Some(proto::data_value::DataType::NullValue(_)) => Ok(None), - other => Err(DecodeError::TypeMismatch { - column: idx, - expected: "Price|Number", - observed: observed_name(other), - }), - } -} - -/// Decode a text-valued cell. -/// -/// `Text(s)` → `Ok(Some(s))`, `NullValue` → `Ok(None)`. -/// -/// # Errors -/// -/// Errors on any other cell type or missing cell. -pub(crate) fn row_text( - row: &proto::DataValueList, - idx: usize, -) -> Result, DecodeError> { - let Some(dv) = row.values.get(idx) else { - return Err(DecodeError::MissingCell { column: idx }); - }; - match dv.data_type.as_ref() { - Some(proto::data_value::DataType::Text(s)) => Ok(Some(s.clone())), - Some(proto::data_value::DataType::NullValue(_)) => Ok(None), - other => Err(DecodeError::TypeMismatch { - column: idx, - expected: "Text", - observed: observed_name(other), - }), - } -} - -/// Decode an `i64`-valued cell. -/// -/// `Number(n)` → `Ok(Some(n))`; `Price(p)` → scaled with i64-native -/// arithmetic (no f64 hop), so values past `2^53` round-trip bit-exact; -/// `NullValue` → `Ok(None)`. -/// -/// Used by the generated parsers for schema columns typed `i64` — added -/// with the EodTick `volume`/`count` widening (where on high-volume -/// symbols the values exceed `i32::MAX`). -/// -/// `price_type` is clamped to `0..=19` to match -/// [`tdbe::types::price::Price::new`], so the same wire cell decodes -/// identically through this function and [`row_price_f64`]. -/// -/// # Errors -/// -/// Returns `DecodeError::TypeMismatch` for any other cell variant. Returns -/// `DecodeError::MissingCell` for an out-of-bounds column index. Under the -/// clamped `0..=19` price-type contract, scale-up cannot overflow `i64` -/// (max product is `i32::MAX * 10^9 ≈ 2.15e18`, well under `i64::MAX`). -pub(crate) fn row_number_i64( - row: &proto::DataValueList, - idx: usize, -) -> Result, DecodeError> { - let Some(dv) = row.values.get(idx) else { - return Err(DecodeError::MissingCell { column: idx }); - }; - match dv.data_type.as_ref() { - Some(proto::data_value::DataType::Number(n)) => Ok(Some(*n)), - Some(proto::data_value::DataType::Price(p)) => { - // Vendor convention: real_value = value * 10^(type - 10). - // Clamp `type` to 0..=19 to match `tdbe::Price::new`, so the - // same wire cell decodes identically through `row_price_f64` - // and `row_number_i64`. Positive exp scales up; negative exp - // scales down. v == 0 short-circuits to 0 so a zero price - // never trips the scale-up overflow guard. - let v = i64::from(p.value); - if v == 0 { - return Ok(Some(0)); - } - let price_type = p.r#type.clamp(0, 19); - let exp = price_type - 10; - // After clamping, exp ∈ [-10, 9]. Scale-up: i32::MAX * 10^9 - // ≈ 2.147e18 < i64::MAX (≈ 9.22e18), so checked_mul cannot - // overflow. checked_mul preserves the contract anyway. - let scaled = if exp >= 0 { - 10i64 - .checked_pow(exp.unsigned_abs()) - .and_then(|m| v.checked_mul(m)) - } else { - Some(v / 10i64.pow(exp.unsigned_abs())) - }; - match scaled { - Some(n) => Ok(Some(n)), - None => Err(DecodeError::TypeMismatch { - column: idx, - expected: "i64-fitting Price", - observed: "Price overflowing i64", - }), - } - } - Some(proto::data_value::DataType::NullValue(_)) => Ok(None), - other => Err(DecodeError::TypeMismatch { - column: idx, - expected: "Number|Price", - observed: observed_name(other), - }), - } -} - -// Generated code -- parser functions from tick_schema.toml by build.rs. -#[allow(clippy::pedantic)] // Reason: auto-generated parser code, not under our control. -mod decode_generated { - use super::*; - include!(concat!(env!("OUT_DIR"), "/decode_generated.rs")); -} -pub use decode_generated::*; - -/// Borrow the cell at `idx`, returning an error if the row is too short. -fn cell_type( - row: &proto::DataValueList, - idx: usize, -) -> Result, DecodeError> { - let Some(dv) = row.values.get(idx) else { - return Err(DecodeError::MissingCell { column: idx }); - }; - Ok(dv.data_type.as_ref()) -} - -/// Hand-written parser for `OptionContract` that handles the v3 server's -/// text-formatted fields (expiration as ISO date, right as "PUT"/"CALL"). -/// -/// The `expiration` and `right` columns legitimately arrive as either `Number` -/// or `Text` depending on the upstream version, so the parser dispatches on -/// the cell's own type rather than coalescing silently. Mismatched types -/// propagate as [`DecodeError::TypeMismatch`]. -/// -/// # Errors -/// -/// Returns [`DecodeError`] on type mismatch or missing cell. -pub fn parse_option_contracts_v3( - table: &crate::proto::DataTable, -) -> Result, DecodeError> { - let h: Vec<&str> = table - .headers - .iter() - .map(std::string::String::as_str) - .collect(); - - // Same schema-drift guard as the generated parsers: "no contracts today" - // is legitimate, but a rows-present response missing the required `root` - // column is a silent data-loss trap. The wire column is still named - // `root` (or `symbol` via the v3 alias in `decode::HEADER_ALIASES`); the - // `symbol` binding here is the public-API field name documented in the - // v3 vendor migration guide. - let symbol_idx = match find_header(&h, "root") { - Some(i) => i, - None => { - if table.data_table.is_empty() { - return Ok(vec![]); - } - return Err(DecodeError::MissingRequiredHeader { - header: "root", - rows: table.data_table.len(), - available: h.join(","), - }); - } - }; - let exp_idx = find_header(&h, "expiration"); - let strike_idx = find_header(&h, "strike"); - let right_idx = find_header(&h, "right"); - - table - .data_table - .iter() - .map(|row| { - let symbol = row_text(row, symbol_idx)?.unwrap_or_default(); - - // Expiration: `Number` carries YYYYMMDD directly; `Text` carries - // an ISO "2026-04-13" that we parse here. `NullValue` → 0 (legit - // null, coalesce). An unset oneof is a wire anomaly → TypeMismatch. - let expiration = match exp_idx { - Some(i) => match cell_type(row, i)? { - Some(proto::data_value::DataType::Number(n)) => *n as i32, - Some(proto::data_value::DataType::Text(s)) => parse_iso_date(s), - Some(proto::data_value::DataType::NullValue(_)) => 0, - None => { - return Err(DecodeError::TypeMismatch { - column: i, - expected: "Number|Text", - observed: "Unset", - }); - } - other => { - return Err(DecodeError::TypeMismatch { - column: i, - expected: "Number|Text", - observed: observed_name(other), - }); - } - }, - None => 0, - }; - - let strike = match strike_idx { - Some(i) => row_price_f64(row, i)?.unwrap_or(0.0), - None => 0.0, - }; - - // Right: `Number` carries the ASCII code directly; `Text` carries - // "PUT"/"CALL"/"P"/"C". `NullValue` / unknown text → 0. An unset - // oneof is a wire anomaly → TypeMismatch. - let right = match right_idx { - Some(i) => match cell_type(row, i)? { - Some(proto::data_value::DataType::Number(n)) => *n as i32, - Some(proto::data_value::DataType::Text(s)) => match s.as_str() { - "CALL" | "C" => 67, // ASCII 'C' - "PUT" | "P" => 80, // ASCII 'P' - _ => 0, - }, - Some(proto::data_value::DataType::NullValue(_)) => 0, - None => { - return Err(DecodeError::TypeMismatch { - column: i, - expected: "Number|Text", - observed: "Unset", - }); - } - other => { - return Err(DecodeError::TypeMismatch { - column: i, - expected: "Number|Text", - observed: observed_name(other), - }); - } - }, - None => 0, - }; - - Ok(OptionContract { - symbol, - expiration, - strike, - right, - }) - }) - .collect() -} - -/// Parse an ISO date string "2026-04-13" to YYYYMMDD integer 20260413. -// Reason: date parsing with known-safe integer ranges. -#[allow(clippy::cast_possible_truncation, clippy::missing_panics_doc)] -pub(crate) fn parse_iso_date(s: &str) -> i32 { - // Fast path: already numeric (YYYYMMDD) - if let Ok(n) = s.parse::() { - return n; - } - // ISO format: YYYY-MM-DD - let parts: Vec<&str> = s.split('-').collect(); - if parts.len() == 3 { - if let (Ok(y), Ok(m), Ok(d)) = ( - parts[0].parse::(), - parts[1].parse::(), - parts[2].parse::(), - ) { - return y * 10_000 + m * 100 + d; - } - } - 0 -} - -/// Parse a time string "HH:MM:SS" to milliseconds from midnight. -fn parse_time_text(s: &str) -> i32 { - let parts: Vec<&str> = s.split(':').collect(); - if parts.len() == 3 { - if let (Ok(h), Ok(m), Ok(sec)) = ( - parts[0].parse::(), - parts[1].parse::(), - parts[2].parse::(), - ) { - return (h * 3_600 + m * 60 + sec) * 1_000; - } - } - 0 -} - -/// Calendar day status constants. -/// -/// The v3 MDDS server sends a `type` column with text values. We map them to -/// integer constants for the `CalendarDay.status` field: -/// -/// | Server text | Constant | Meaning | -/// |----------------|----------|-----------------------------------| -/// | `"open"` | `0` | Normal trading day | -/// | `"early_close"`| `1` | Early close (e.g. day after Thanksgiving) | -/// | `"full_close"` | `2` | Market closed (holiday) | -/// | `"weekend"` | `3` | Weekend | -/// | (unknown) | `-1` | Unrecognized status text | -pub const CALENDAR_STATUS_OPEN: i32 = 0; -pub const CALENDAR_STATUS_EARLY_CLOSE: i32 = 1; -pub const CALENDAR_STATUS_FULL_CLOSE: i32 = 2; -pub const CALENDAR_STATUS_WEEKEND: i32 = 3; -pub const CALENDAR_STATUS_UNKNOWN: i32 = -1; - -/// Map a v3 calendar `type` text to `(is_open, status)`. -fn calendar_type_text(s: &str) -> (i32, i32) { - match s { - "open" => (1, CALENDAR_STATUS_OPEN), - "early_close" => (1, CALENDAR_STATUS_EARLY_CLOSE), - "full_close" => (0, CALENDAR_STATUS_FULL_CLOSE), - "weekend" => (0, CALENDAR_STATUS_WEEKEND), - _ => (0, CALENDAR_STATUS_UNKNOWN), - } -} - -/// Hand-written parser for `CalendarDay` that handles the v3 server's -/// text-formatted fields. -/// -/// The v3 MDDS server sends calendar data with different column names and types -/// than the generated parser expects: -/// -/// | Schema field | Server header | Server type | Mapping | -/// |--------------|---------------|-------------|---------------------------------------| -/// | `date` | `date` | Text | "2025-01-01" -> 20250101 | -/// | `is_open` | `type` | Text | "`open"/"early_close`" -> 1, else -> 0 | -/// | `open_time` | `open` | Text / Null | "09:30:00" -> 34200000 ms | -/// | `close_time` | `close` | Text / Null | "16:00:00" -> 57600000 ms | -/// | `status` | `type` | Text | See [`CALENDAR_STATUS_OPEN`] etc. | -/// -/// Note: `calendar_on_date` and `calendar_open_today` omit the `date` column. -/// Each column dispatches on the cell's own type rather than coalescing -/// silently — mismatched types propagate as [`DecodeError::TypeMismatch`]. -/// -/// # Errors -/// -/// Returns [`DecodeError`] on type mismatch or missing cell. -pub fn parse_calendar_days_v3( - table: &crate::proto::DataTable, -) -> Result, DecodeError> { - let h: Vec<&str> = table - .headers - .iter() - .map(std::string::String::as_str) - .collect(); - - let date_idx = h.iter().position(|&s| s == "date"); - let type_idx = h.iter().position(|&s| s == "type"); - let open_idx = h.iter().position(|&s| s == "open"); - let close_idx = h.iter().position(|&s| s == "close"); - - table - .data_table - .iter() - .map(|row| { - // date: Number carries YYYYMMDD, Timestamp converts to ET date, - // Text "2025-01-01" parses to YYYYMMDD. `NullValue` → 0 (legit - // null). Unset oneof is a wire anomaly → TypeMismatch. - let date = match date_idx { - Some(i) => match cell_type(row, i)? { - Some(proto::data_value::DataType::Number(n)) => *n as i32, - Some(proto::data_value::DataType::Timestamp(ts)) => { - timestamp_to_date(ts.epoch_ms) - } - Some(proto::data_value::DataType::Text(s)) => parse_iso_date(s), - Some(proto::data_value::DataType::NullValue(_)) => 0, - None => { - return Err(DecodeError::TypeMismatch { - column: i, - expected: "Number|Timestamp|Text", - observed: "Unset", - }); - } - other => { - return Err(DecodeError::TypeMismatch { - column: i, - expected: "Number|Timestamp|Text", - observed: observed_name(other), - }); - } - }, - None => 0, - }; - - // type: Text "open"/"full_close"/"early_close"/"weekend"; Number - // kept as a future-proofing path. `NullValue` → (0, 0). Unset - // oneof is a wire anomaly → TypeMismatch. - let (is_open, status) = match type_idx { - Some(i) => match cell_type(row, i)? { - Some(proto::data_value::DataType::Text(s)) => calendar_type_text(s), - Some(proto::data_value::DataType::Number(n)) => { - let n = *n as i32; - (i32::from(n != 0), n) - } - Some(proto::data_value::DataType::NullValue(_)) => (0, 0), - None => { - return Err(DecodeError::TypeMismatch { - column: i, - expected: "Text|Number", - observed: "Unset", - }); - } - other => { - return Err(DecodeError::TypeMismatch { - column: i, - expected: "Text|Number", - observed: observed_name(other), - }); - } - }, - None => (0, 0), - }; - - let open_time = decode_calendar_time(row, open_idx)?; - let close_time = decode_calendar_time(row, close_idx)?; - - Ok(CalendarDay { - date, - is_open, - open_time, - close_time, - status, - }) - }) - .collect() -} - -/// Decode a calendar `open`/`close` column. `Text "HH:MM:SS"` → ms-of-day; -/// `Number` kept as future-proofing. `NullValue` / absent column → 0. An unset -/// oneof is a wire anomaly → [`DecodeError::TypeMismatch`]. -fn decode_calendar_time( - row: &proto::DataValueList, - idx: Option, -) -> Result { - let Some(i) = idx else { - return Ok(0); - }; - match cell_type(row, i)? { - Some(proto::data_value::DataType::Text(s)) => Ok(parse_time_text(s)), - Some(proto::data_value::DataType::Number(n)) => Ok(*n as i32), - Some(proto::data_value::DataType::NullValue(_)) => Ok(0), - None => Err(DecodeError::TypeMismatch { - column: i, - expected: "Text|Number", - observed: "Unset", - }), - other => Err(DecodeError::TypeMismatch { - column: i, - expected: "Text|Number", - observed: observed_name(other), - }), - } -} -#[cfg(test)] -mod tests { - use super::*; - - /// Build a DataValue containing a Number. - fn dv_number(n: i64) -> proto::DataValue { - proto::DataValue { - data_type: Some(proto::data_value::DataType::Number(n)), - } - } - - /// Build a DataValue containing a Price. - fn dv_price(value: i32, r#type: i32) -> proto::DataValue { - proto::DataValue { - data_type: Some(proto::data_value::DataType::Price(proto::Price { - value, - r#type, - })), - } - } - - /// Build a DataValue containing NullValue. - fn dv_null() -> proto::DataValue { - proto::DataValue { - data_type: Some(proto::data_value::DataType::NullValue(0)), - } - } - - /// Build a DataValue containing a Timestamp. - fn dv_timestamp(epoch_ms: u64) -> proto::DataValue { - proto::DataValue { - data_type: Some(proto::data_value::DataType::Timestamp( - proto::ZonedDateTime { epoch_ms, zone: 0 }, - )), - } - } - - /// Build a DataValue with no data_type set (missing). - fn dv_missing() -> proto::DataValue { - proto::DataValue { data_type: None } - } - - fn row_of(values: Vec) -> proto::DataValueList { - proto::DataValueList { values } - } - - #[test] - fn row_number_returns_value_for_number_cell() { - let row = row_of(vec![dv_number(42)]); - assert_eq!(row_number(&row, 0).unwrap(), Some(42)); - } - - #[test] - fn row_number_returns_none_for_null_cell() { - let row = row_of(vec![dv_null()]); - assert_eq!(row_number(&row, 0).unwrap(), None); - } - - #[test] - fn row_number_errors_on_unset_cell() { - // A DataValue with the oneof unset is a wire-protocol anomaly. - // Java's `PojoMessageUtils.convert` hits the default arm for - // `DATATYPE_NOT_SET` and throws `IllegalArgumentException`; we - // surface it as `TypeMismatch { observed: "Unset" }`. - let row = row_of(vec![dv_missing()]); - assert_eq!( - row_number(&row, 0), - Err(DecodeError::TypeMismatch { - column: 0, - expected: "Number|Timestamp", - observed: "Unset", - }) - ); - } - - #[test] - fn row_number_errors_on_out_of_bounds() { - let row = row_of(vec![]); - assert_eq!( - row_number(&row, 5), - Err(DecodeError::MissingCell { column: 5 }) - ); - } - - #[test] - fn row_number_errors_on_text_cell() { - let row = row_of(vec![dv_text("oops")]); - assert_eq!( - row_number(&row, 0), - Err(DecodeError::TypeMismatch { - column: 0, - expected: "Number|Timestamp", - observed: "Text", - }) - ); - } - - #[test] - fn row_number_errors_on_price_cell() { - let row = row_of(vec![dv_price(12345, 10)]); - assert_eq!( - row_number(&row, 0), - Err(DecodeError::TypeMismatch { - column: 0, - expected: "Number|Timestamp", - observed: "Price", - }) - ); - } - - #[test] - fn row_number_accepts_timestamp_for_time_columns() { - // v3 MDDS sends `ms_of_day` as a Timestamp. - let epoch_ms: u64 = 1_775_050_200_000; // 2026-04-01 09:30 ET - let row = row_of(vec![dv_timestamp(epoch_ms)]); - assert_eq!(row_number(&row, 0).unwrap(), Some(34_200_000)); - } - - #[test] - fn row_text_errors_on_number_cell() { - let row = row_of(vec![dv_number(42)]); - assert_eq!( - row_text(&row, 0), - Err(DecodeError::TypeMismatch { - column: 0, - expected: "Text", - observed: "Number", - }) - ); - } - - #[test] - fn row_price_f64_accepts_number_cell() { - // Documented v3 MDDS behavior: f64 fields may arrive as plain Number. - let row = row_of(vec![dv_number(1_500_000)]); - assert_eq!(row_price_f64(&row, 0).unwrap(), Some(1_500_000.0)); - } - - #[test] - fn row_price_value_returns_value_for_price_cell() { - let row = row_of(vec![dv_price(12345, 10)]); - assert_eq!(row_price_value(&row, 0).unwrap(), Some(12345)); - } - - #[test] - fn row_price_value_returns_none_for_null_cell() { - let row = row_of(vec![dv_null()]); - assert_eq!(row_price_value(&row, 0).unwrap(), None); - } - - #[test] - fn row_price_type_returns_type_for_price_cell() { - let row = row_of(vec![dv_price(12345, 10)]); - assert_eq!(row_price_type(&row, 0).unwrap(), Some(10)); - } - - #[test] - fn row_price_type_returns_none_for_null_cell() { - let row = row_of(vec![dv_null()]); - assert_eq!(row_price_type(&row, 0).unwrap(), None); - } - - #[test] - fn null_cells_dont_corrupt_trade_ticks() { - // Build a minimal DataTable with one row that has a NullValue in a field. - // Note: "price" header triggers Price-typed extraction, so we use a Price cell. - let table = proto::DataTable { - headers: vec![ - "ms_of_day".into(), - "sequence".into(), - "ext_condition1".into(), - "ext_condition2".into(), - "ext_condition3".into(), - "ext_condition4".into(), - "condition".into(), - "size".into(), - "exchange".into(), - "price".into(), - "condition_flags".into(), - "price_flags".into(), - "volume_type".into(), - "records_back".into(), - "date".into(), - ], - data_table: vec![row_of(vec![ - dv_number(34200000), // ms_of_day - dv_number(1), // sequence - dv_null(), // ext_condition1 = NullValue - dv_number(0), // ext_condition2 - dv_number(0), // ext_condition3 - dv_number(0), // ext_condition4 - dv_number(0), // condition - dv_number(100), // size - dv_number(4), // exchange - dv_price(15000, 10), // price (Price-typed because header is "price") - dv_number(0), // condition_flags - dv_number(0), // price_flags - dv_number(0), // volume_type - dv_number(0), // records_back - dv_number(20240301), // date - ])], - }; - - let ticks = parse_trade_ticks(&table).unwrap(); - assert_eq!(ticks.len(), 1); - let tick = &ticks[0]; - assert_eq!(tick.ms_of_day, 34200000); - // NullValue should default to 0, not corrupt subsequent fields. - assert_eq!(tick.ext_condition1, 0); - assert_eq!(tick.size, 100); - assert!((tick.price - 15000.0).abs() < 1e-10); - assert_eq!(tick.date, 20240301); - } - - #[test] - fn extract_number_column_returns_none_for_null() { - let table = proto::DataTable { - headers: vec!["val".into()], - data_table: vec![ - row_of(vec![dv_number(10)]), - row_of(vec![dv_null()]), - row_of(vec![dv_number(30)]), - ], - }; - - let col = extract_number_column(&table, "val"); - assert_eq!(col, vec![Some(10), None, Some(30)]); - } - - #[test] - // Reason: ms_of_day fits in i32; epoch_ms is in valid market data range. - #[allow(clippy::cast_possible_wrap, clippy::cast_possible_truncation)] - fn timestamp_to_ms_of_day_edt() { - // 2026-04-01 09:30:00 ET (EDT, UTC-4) = 2026-04-01 13:30:00 UTC - // epoch_ms for 2026-04-01 13:30:00 UTC - let epoch_ms: u64 = 1_775_050_200_000; // Apr 1 2026, 13:30 UTC - let ms = super::timestamp_to_ms_of_day(epoch_ms); - assert_eq!(ms, 34_200_000, "9:30 AM ET in milliseconds"); - } - - #[test] - // Reason: ms_of_day fits in i32; epoch_ms is in valid market data range. - #[allow(clippy::cast_possible_wrap, clippy::cast_possible_truncation)] - fn timestamp_to_ms_of_day_est() { - // 2026-01-15 09:30:00 ET (EST, UTC-5) = 2026-01-15 14:30:00 UTC - let epoch_ms: u64 = 1_768_487_400_000; - let ms = super::timestamp_to_ms_of_day(epoch_ms); - assert_eq!(ms, 34_200_000, "9:30 AM ET in milliseconds (winter)"); - } - - #[test] - fn timestamp_to_date_edt() { - let epoch_ms: u64 = 1_775_050_200_000; // Apr 1 2026, 13:30 UTC - let date = super::timestamp_to_date(epoch_ms); - assert_eq!(date, 20260401); - } - - #[test] - fn timestamp_to_date_est() { - let epoch_ms: u64 = 1_768_487_400_000; // Jan 15 2026, 14:30 UTC - let date = super::timestamp_to_date(epoch_ms); - assert_eq!(date, 20260115); - } - - #[test] - fn dst_transition_march_2026() { - // 2026 DST starts March 8 (second Sunday of March) - // Before: EST (UTC-5) at 06:59 UTC. After: EDT (UTC-4) at 07:01 UTC. - let before: u64 = 1_772_953_140_000; // Mar 8 2026, 06:59 UTC - assert_eq!(super::eastern_offset_ms(before), -5 * 3_600 * 1_000); - let after: u64 = 1_772_953_260_000; // Mar 8 2026, 07:01 UTC - assert_eq!(super::eastern_offset_ms(after), -4 * 3_600 * 1_000); - } - - #[test] - fn pre2007_dst_summer_uses_old_rules() { - // 2006: old rules apply (first Sunday April -> last Sunday October). - // 2006-07-15 18:00:00 UTC = 2006-07-15 14:00:00 EDT (summer, mid-July). - // This is well within DST under both old and new rules, so EDT (UTC-4). - let epoch_ms: u64 = 1_153_065_600_000; // Jul 15 2006, 18:00 UTC - assert_eq!( - super::eastern_offset_ms(epoch_ms), - -4 * 3_600 * 1_000, - "mid-July 2006 should be EDT under old DST rules" - ); - } - - #[test] - fn pre2007_est_before_april_dst_start() { - // 2006: old rules — DST starts first Sunday of April (April 2, 2006). - // 2006-02-15 15:00:00 UTC = 2006-02-15 10:00:00 EST (winter, mid-Feb). - // Under old rules, February is EST. - let epoch_ms: u64 = 1_140_015_600_000; // Feb 15 2006, 15:00 UTC - assert_eq!( - super::eastern_offset_ms(epoch_ms), - -5 * 3_600 * 1_000, - "mid-February 2006 should be EST under old DST rules" - ); - } - - /// Build a DataValue containing Text. - fn dv_text(s: &str) -> proto::DataValue { - proto::DataValue { - data_type: Some(proto::data_value::DataType::Text(s.to_string())), - } - } - - #[test] - fn parse_calendar_v3_holiday() { - // Simulate calendar_year response for a holiday (full_close). - let table = proto::DataTable { - headers: vec!["date".into(), "type".into(), "open".into(), "close".into()], - data_table: vec![row_of(vec![ - dv_text("2025-01-01"), - dv_text("full_close"), - dv_null(), - dv_null(), - ])], - }; - - let days = parse_calendar_days_v3(&table).unwrap(); - assert_eq!(days.len(), 1); - let d = &days[0]; - assert_eq!(d.date, 20250101); - assert_eq!(d.is_open, 0); - assert_eq!(d.open_time, 0); - assert_eq!(d.close_time, 0); - assert_eq!(d.status, CALENDAR_STATUS_FULL_CLOSE); - } - - #[test] - fn parse_calendar_v3_open_day() { - // Simulate calendar_on_date response for a regular trading day. - // Note: on_date and open_today omit the "date" column. - let table = proto::DataTable { - headers: vec!["type".into(), "open".into(), "close".into()], - data_table: vec![row_of(vec![ - dv_text("open"), - dv_text("09:30:00"), - dv_text("16:00:00"), - ])], - }; - - let days = parse_calendar_days_v3(&table).unwrap(); - assert_eq!(days.len(), 1); - let d = &days[0]; - assert_eq!(d.date, 0); // no date column - assert_eq!(d.is_open, 1); - assert_eq!(d.open_time, 34_200_000); // 9:30 AM = 9*3600+30*60 = 34200 seconds = 34200000 ms - assert_eq!(d.close_time, 57_600_000); // 4:00 PM = 16*3600 = 57600 seconds = 57600000 ms - assert_eq!(d.status, CALENDAR_STATUS_OPEN); - } - - #[test] - fn parse_calendar_v3_early_close() { - // Simulate an early close day (day after Thanksgiving). - let table = proto::DataTable { - headers: vec!["date".into(), "type".into(), "open".into(), "close".into()], - data_table: vec![row_of(vec![ - dv_text("2025-11-28"), - dv_text("early_close"), - dv_text("09:30:00"), - dv_text("13:00:00"), - ])], - }; - - let days = parse_calendar_days_v3(&table).unwrap(); - assert_eq!(days.len(), 1); - let d = &days[0]; - assert_eq!(d.date, 20251128); - assert_eq!(d.is_open, 1); - assert_eq!(d.open_time, 34_200_000); - assert_eq!(d.close_time, 46_800_000); // 1:00 PM = 13*3600 = 46800 seconds = 46800000 ms - assert_eq!(d.status, CALENDAR_STATUS_EARLY_CLOSE); - } - - #[test] - fn parse_calendar_v3_weekend() { - let table = proto::DataTable { - headers: vec!["type".into(), "open".into(), "close".into()], - data_table: vec![row_of(vec![dv_text("weekend"), dv_null(), dv_null()])], - }; - - let days = parse_calendar_days_v3(&table).unwrap(); - assert_eq!(days.len(), 1); - let d = &days[0]; - assert_eq!(d.is_open, 0); - assert_eq!(d.status, CALENDAR_STATUS_WEEKEND); - } - - #[test] - fn parse_time_text_valid() { - assert_eq!(super::parse_time_text("09:30:00"), 34_200_000); - assert_eq!(super::parse_time_text("16:00:00"), 57_600_000); - assert_eq!(super::parse_time_text("13:00:00"), 46_800_000); - assert_eq!(super::parse_time_text("00:00:00"), 0); - } - - #[test] - fn parse_time_text_invalid_returns_zero() { - assert_eq!(super::parse_time_text("invalid"), 0); - assert_eq!(super::parse_time_text(""), 0); - } - - #[test] - fn parse_eod_timestamp_aliases_decode_time_and_date_separately() { - // 2026-04-01 13:30:00 UTC = 2026-04-01 09:30:00 ET (EDT). - let epoch_ms: u64 = 1_775_050_200_000; - let table = proto::DataTable { - headers: vec![ - "timestamp".into(), - "timestamp2".into(), - "open".into(), - "close".into(), - ], - data_table: vec![row_of(vec![ - dv_timestamp(epoch_ms), - dv_timestamp(epoch_ms), - dv_number(15000), - dv_number(15100), - ])], - }; - - let ticks = parse_eod_ticks(&table).unwrap(); - assert_eq!(ticks.len(), 1); - assert_eq!(ticks[0].ms_of_day, 34_200_000); - assert_eq!(ticks[0].ms_of_day2, 34_200_000); - assert_eq!(ticks[0].date, 20260401); - assert!((ticks[0].open - 15000.0).abs() < 1e-10); - assert!((ticks[0].close - 15100.0).abs() < 1e-10); - } - - #[test] - fn row_number_i64_decodes_price_cells() { - // MDDS sends large integer fields as Price cells, not Number cells. - // Price encoding: price_type centered at 10. - // type=10 → value as-is, type=13 → value * 10^3, type=7 → value / 10^3 - // Example: Price { value: 3842, type: 19 } = 3842 * 10^9 = 3_842_000_000_000 - let row = row_of(vec![dv_price(3842, 19)]); - assert_eq!( - row_number_i64(&row, 0).unwrap(), - Some(3_842_000_000_000_i64) - ); - } - - #[test] - fn row_number_i64_still_decodes_number_cells() { - let row = row_of(vec![dv_number(999_999_999)]); - assert_eq!(row_number_i64(&row, 0).unwrap(), Some(999_999_999)); - } - - #[test] - fn row_number_i64_returns_none_for_null() { - let row = row_of(vec![dv_null()]); - assert_eq!(row_number_i64(&row, 0).unwrap(), None); - } - - #[test] - fn row_number_i64_errors_on_text_cell() { - let row = row_of(vec![dv_text("oops")]); - assert_eq!( - row_number_i64(&row, 0), - Err(DecodeError::TypeMismatch { - column: 0, - expected: "Number|Price", - observed: "Text", - }) - ); - } - - /// Pin a Price cell past `2^53` to the i64-native result for `type=17`. - #[test] - fn row_number_i64_price_cell_returns_bit_exact_i64() { - let row = row_of(vec![dv_price(1_073_741_823, 17)]); - let got = row_number_i64(&row, 0).unwrap().expect("Some"); - assert_eq!(got, 10_737_418_230_000_000_i64); - assert!(got > (1_i64 << 53)); - } - - /// `value == 0` decodes to 0 regardless of the exponent. Mathematically - /// the product is zero; the decoder must not reject a zero cell, even - /// when `price_type` is at the clamp boundary. - #[test] - fn row_number_i64_price_zero_value_short_circuits() { - let row = row_of(vec![dv_price(0, 19)]); - assert_eq!(row_number_i64(&row, 0), Ok(Some(0))); - } - - /// `row_number_i64` and `row_price_f64` must agree on the same wire - /// cell. With `type=19` (in-range) and `value=42`, `row_price_f64` - /// routes through `Price::new` which keeps `price_type=19`, and - /// `row_number_i64` produces the i64-native scale. Both should match. - /// Manual: 42 * 10^(19-10) = 42 * 10^9 = 42_000_000_000. - #[test] - fn row_number_i64_matches_row_price_f64_at_type_19() { - let row = row_of(vec![dv_price(42, 19)]); - let as_int = row_number_i64(&row, 0).unwrap().expect("Some"); - let as_float = row_price_f64(&row, 0).unwrap().expect("Some"); - assert_eq!(as_int, 42_000_000_000_i64); - assert!((as_float - 42_000_000_000.0_f64).abs() < 1.0); - } - - /// `price_type=20` is out-of-range; both decoders must clamp to 19 - /// (matching `Price::new`). A `type=20` cell and a `type=19` cell with - /// the same value must therefore decode to the same i64. - #[test] - fn row_number_i64_clamps_price_type_above_19() { - let row_clamped = row_of(vec![dv_price(7, 20)]); - let row_in_range = row_of(vec![dv_price(7, 19)]); - assert_eq!( - row_number_i64(&row_clamped, 0).unwrap(), - row_number_i64(&row_in_range, 0).unwrap(), - ); - // Pin the absolute value too: 7 * 10^9 = 7_000_000_000. - assert_eq!( - row_number_i64(&row_clamped, 0).unwrap(), - Some(7_000_000_000_i64) - ); - } - - /// Maximum scale-up under the clamped contract: `value=i32::MAX, - /// type=19` yields `i32::MAX * 10^9 = 2_147_483_647_000_000_000`, - /// which is below `i64::MAX = 9_223_372_036_854_775_807`. The product - /// must fit and decode bit-exact (no `TypeMismatch`). - #[test] - fn row_number_i64_max_in_range_price_fits_i64() { - let row = row_of(vec![dv_price(i32::MAX, 19)]); - assert_eq!( - row_number_i64(&row, 0).unwrap(), - Some(2_147_483_647_000_000_000_i64), - ); - } - - #[test] - fn parse_trade_ticks_propagates_type_mismatch() { - // A Text cell in an i32 column is a schema violation — the parser - // must surface it, not silently coerce to 0. - let table = proto::DataTable { - headers: vec!["ms_of_day".into(), "price".into()], - data_table: vec![row_of(vec![dv_text("not-a-number"), dv_price(15000, 10)])], - }; - let err = parse_trade_ticks(&table).unwrap_err(); - assert!( - matches!(err, DecodeError::TypeMismatch { .. }), - "expected TypeMismatch, got {err:?}" - ); - } - - // ─────────── Unset-oneof is an error at every strict decode site ─────────── - // - // A `DataValue` with its `data_type` oneof unset is a wire-protocol - // anomaly (Java's `PojoMessageUtils.convert` default arm throws - // `IllegalArgumentException`). The helpers `row_number` / `row_date` / - // etc. already surface it as `TypeMismatch { observed: "Unset" }`. These - // tests pin the same behaviour on the call-sites that used to coalesce - // `NullValue | None` to zero: `parse_option_contracts_v3`, - // `parse_calendar_days_v3`, the generator-emitted EOD helpers, and the - // generator-emitted contract-id injected `expiration` / `right` fields. - - #[test] - fn parse_option_contracts_v3_errors_on_unset_expiration() { - let table = proto::DataTable { - headers: vec!["root".into(), "expiration".into()], - data_table: vec![row_of(vec![dv_text("AAPL"), dv_missing()])], - }; - assert_eq!( - parse_option_contracts_v3(&table).unwrap_err(), - DecodeError::TypeMismatch { - column: 1, - expected: "Number|Text", - observed: "Unset", - } - ); - } - - #[test] - fn parse_option_contracts_v3_errors_on_unset_right() { - let table = proto::DataTable { - headers: vec!["root".into(), "right".into()], - data_table: vec![row_of(vec![dv_text("AAPL"), dv_missing()])], - }; - assert_eq!( - parse_option_contracts_v3(&table).unwrap_err(), - DecodeError::TypeMismatch { - column: 1, - expected: "Number|Text", - observed: "Unset", - } - ); - } - - #[test] - fn parse_calendar_days_v3_errors_on_unset_date() { - let table = proto::DataTable { - headers: vec!["date".into(), "type".into()], - data_table: vec![row_of(vec![dv_missing(), dv_text("open")])], - }; - assert_eq!( - parse_calendar_days_v3(&table).unwrap_err(), - DecodeError::TypeMismatch { - column: 0, - expected: "Number|Timestamp|Text", - observed: "Unset", - } - ); - } - - #[test] - fn parse_calendar_days_v3_errors_on_unset_open_time() { - // `decode_calendar_time` is the helper covering both `open` and - // `close`; one test pins the shared path. - let table = proto::DataTable { - headers: vec!["type".into(), "open".into(), "close".into()], - data_table: vec![row_of(vec![ - dv_text("open"), - dv_missing(), - dv_text("16:00:00"), - ])], - }; - assert_eq!( - parse_calendar_days_v3(&table).unwrap_err(), - DecodeError::TypeMismatch { - column: 1, - expected: "Text|Number", - observed: "Unset", - } - ); - } - - #[test] - fn parse_eod_ticks_errors_on_unset_cell() { - // `parse_eod_ticks` is generator-emitted with the `eod_num` / - // `eod_date` / `eod_price` helpers; one test pins the shared path. - let table = proto::DataTable { - headers: vec!["timestamp".into(), "open".into()], - data_table: vec![row_of(vec![dv_missing(), dv_number(15000)])], - }; - let err = parse_eod_ticks(&table).unwrap_err(); - assert_eq!( - err, - DecodeError::TypeMismatch { - column: 0, - expected: "Number|Price|Timestamp", - observed: "Unset", - } - ); - } - - #[test] - fn parse_trade_ticks_errors_on_unset_injected_expiration() { - // `parse_trade_ticks` is generator-emitted with `contract_id = true`; - // an `expiration` header in the server payload triggers the injected - // `expiration` / `strike` / `right` decode. An unset cell there used - // to coalesce to 0; now it must fail loud. - let table = proto::DataTable { - headers: vec!["ms_of_day".into(), "price".into(), "expiration".into()], - data_table: vec![row_of(vec![ - dv_number(34_200_000), - dv_price(15000, 10), - dv_missing(), - ])], - }; - let err = parse_trade_ticks(&table).unwrap_err(); - assert_eq!( - err, - DecodeError::TypeMismatch { - column: 2, - expected: "Number|Text", - observed: "Unset", - } - ); - } - - #[test] - fn parse_trade_ticks_errors_on_unset_injected_right() { - let table = proto::DataTable { - headers: vec!["ms_of_day".into(), "price".into(), "right".into()], - data_table: vec![row_of(vec![ - dv_number(34_200_000), - dv_price(15000, 10), - dv_missing(), - ])], - }; - let err = parse_trade_ticks(&table).unwrap_err(); - assert_eq!( - err, - DecodeError::TypeMismatch { - column: 2, - expected: "Number|Text", - observed: "Unset", - } - ); - } - - #[test] - fn parse_greeks_all_ticks_decodes_price_encoded_greeks() { - // Regression: an earlier strict decode rejected Price cells for Greek - // columns, but the v3 MDDS server sends Greeks as Price-encoded - // values (mirroring Java's `dataValue2Object` -> BigDecimal path). - // Live run #24520486541 on main surfaced this as - // "column 13: expected Number, got Price" - // on `option_snapshot_greeks_first_order::bulk_chain` and peers. - // Pin Price-cell decoding for both IV and a Greek so a future - // strict-Number tightening can't re-break it silently. - let table = proto::DataTable { - headers: vec![ - "ms_of_day".into(), - "implied_volatility".into(), - "delta".into(), - ], - data_table: vec![row_of(vec![ - dv_number(34_200_000), - // IV = 0.1234 encoded with price_type = 6 (value * 10^-4). - dv_price(1234, 6), - // Delta = 0.5 encoded with price_type = 9 (value * 10^-1). - dv_price(5, 9), - ])], - }; - let ticks = parse_greeks_all_ticks(&table).unwrap(); - assert_eq!(ticks.len(), 1); - assert!((ticks[0].implied_volatility - 0.1234).abs() < 1e-10); - assert!((ticks[0].delta - 0.5).abs() < 1e-10); - } - - /// Pin the `implied_vol → implied_volatility` and `underlying_timestamp - /// → underlying_ms_of_day` aliases in `HEADER_ALIASES` (decode.rs:82) by - /// decoding a wire payload whose headers use ONLY the v3 server-side - /// names. If either alias entry is dropped or mistyped, the matching - /// schema field silently zero-defaults via `opt_float` / `opt_number` - /// (see the generated `parse_greeks_all_ticks` body), and this test - /// catches that regression. - /// - /// The companion fixture-driven test - /// `crates/thetadatadx/tests/test_decode_captures.rs::greeks_all_*` - /// can't catch a broken `implied_vol` alias on its own because the - /// captured fixture's `first_row_implied_volatility` is `0.0` — a - /// missing alias and a real zero IV are indistinguishable there. - #[test] - fn parse_greeks_all_ticks_resolves_implied_vol_and_underlying_timestamp_aliases() { - // Headers use the v3 server-side names. Schema names - // (`implied_volatility`, `underlying_ms_of_day`) are deliberately - // absent so the parser MUST resolve them via `HEADER_ALIASES`. - let table = proto::DataTable { - headers: vec![ - "ms_of_day".into(), - "implied_vol".into(), - "underlying_timestamp".into(), - ], - // IV = 0.42 encoded with price_type = 6 (value * 10^-4). - // underlying_timestamp epoch_ms 1_775_050_200_000 corresponds - // to 2026-04-01 09:30 ET, which `row_number` converts to - // ms-of-day 34_200_000 (matching `first_row_underlying_ms_of_day` - // in the option_history_greeks_all fixture meta). - data_table: vec![row_of(vec![ - dv_number(34_200_000), - dv_price(4200, 6), - dv_timestamp(1_775_050_200_000), - ])], - }; - let ticks = parse_greeks_all_ticks(&table).unwrap(); - assert_eq!(ticks.len(), 1); - let t = &ticks[0]; - - // Non-zero IV proves the `implied_vol` alias resolved; a broken - // alias would produce 0.0 from the `opt_float(None)` arm. - assert!( - (t.implied_volatility - 0.42).abs() < 1e-9, - "implied_vol alias did not resolve: got {}", - t.implied_volatility, - ); - // Non-zero ms-of-day proves the `underlying_timestamp` alias - // resolved; a broken alias would produce 0 from `opt_number(None)`. - assert_eq!(t.underlying_ms_of_day, 34_200_000); - } - - #[test] - fn parse_greeks_all_ticks_still_decodes_number_cells() { - // Companion to the Price-cell regression test: Number cells must - // still decode, matching Java's dispatch-on-wire-type semantics. - let table = proto::DataTable { - headers: vec!["ms_of_day".into(), "implied_volatility".into()], - data_table: vec![row_of(vec![dv_number(34_200_000), dv_number(0)])], - }; - let ticks = parse_greeks_all_ticks(&table).unwrap(); - assert_eq!(ticks.len(), 1); - assert!(ticks[0].implied_volatility.abs() < 1e-10); - } - - /// Vendor wire shape for `option_*_greeks_first_order`: only the seven - /// first-order columns plus IV pair — vanna/charm/vomma/veta/speed/ - /// zomma/color/ultima/d1/d2/dual_delta/dual_gamma/vera are absent and - /// must default to `0.0` without surfacing any `find_header` warn. - /// Column layout pinned to `scripts/upstream_openapi.yaml` schema - /// `items_option_snapshot_greeks_first_order`. - #[test] - fn parse_greeks_all_ticks_decodes_first_order_subset_with_silent_gaps() { - let table = proto::DataTable { - headers: vec![ - "ms_of_day".into(), - "implied_volatility".into(), - "delta".into(), - "theta".into(), - "vega".into(), - "rho".into(), - "epsilon".into(), - "lambda".into(), - "iv_error".into(), - "date".into(), - ], - data_table: vec![row_of(vec![ - dv_number(34_200_000), - dv_price(2142, 6), // implied_volatility = 0.2142 - dv_price(5023, 6), // delta = 0.5023 - dv_price(-114, 6), // theta = -0.0114 - dv_price(8741, 6), // vega = 0.8741 - dv_price(13598, 6), // rho = 1.3598 - dv_price(-1976, 6), // epsilon = -0.1976 - dv_price(32052, 6), // lambda = 3.2052 - dv_price(-3, 6), // iv_error = -3 / 10^4 = -0.0003 - dv_number(20_240_614), - ])], - }; - let ticks = parse_greeks_all_ticks(&table).unwrap(); - assert_eq!(ticks.len(), 1); - let t = &ticks[0]; - - // Wire-present columns: bit-exact against the input. - // `dv_price(value, 6)` decodes as `value * 10^(6-10) = value / 10000` - // (see `tdbe::types::price::Price::to_f64`). - assert_eq!(t.ms_of_day, 34_200_000); - assert!((t.implied_volatility - 0.2142).abs() < 1e-9); - assert!((t.delta - 0.5023).abs() < 1e-9); - assert!((t.theta - -0.0114).abs() < 1e-9); - assert!((t.vega - 0.8741).abs() < 1e-9); - assert!((t.rho - 1.3598).abs() < 1e-9); - assert!((t.epsilon - -0.1976).abs() < 1e-9); - assert!((t.lambda - 3.2052).abs() < 1e-9); - assert!((t.iv_error - -0.0003).abs() < 1e-9); - assert_eq!(t.date, 20_240_614); - - // Wire-absent columns: zero-defaulted. These are the columns the - // server does NOT publish for `_greeks_first_order` — `find_header` - // returning `None` for each must NOT yield an error and must NOT - // warn (the pre-fix behaviour spammed eight warn lines per row). - assert_eq!(t.gamma, 0.0); - assert_eq!(t.vanna, 0.0); - assert_eq!(t.charm, 0.0); - assert_eq!(t.vomma, 0.0); - assert_eq!(t.veta, 0.0); - assert_eq!(t.speed, 0.0); - assert_eq!(t.zomma, 0.0); - assert_eq!(t.color, 0.0); - assert_eq!(t.ultima, 0.0); - assert_eq!(t.d1, 0.0); - assert_eq!(t.d2, 0.0); - assert_eq!(t.dual_delta, 0.0); - assert_eq!(t.dual_gamma, 0.0); - assert_eq!(t.vera, 0.0); - } - - /// Vendor wire shape for `option_*_greeks_second_order`: gamma / vanna - /// / charm / vomma / veta plus IV pair. Column layout pinned to - /// upstream OpenAPI schema `items_option_snapshot_greeks_second_order`. - #[test] - fn parse_greeks_all_ticks_decodes_second_order_subset_with_silent_gaps() { - let table = proto::DataTable { - headers: vec![ - "ms_of_day".into(), - "implied_volatility".into(), - "gamma".into(), - "vanna".into(), - "charm".into(), - "vomma".into(), - "veta".into(), - "iv_error".into(), - "date".into(), - ], - data_table: vec![row_of(vec![ - dv_number(34_200_000), - dv_price(2142, 6), // implied_volatility = 0.2142 - dv_price(120, 6), // gamma = 0.012 - dv_price(45, 6), // vanna = 0.0045 - dv_price(-12, 6), // charm = -0.0012 - dv_price(900, 6), // vomma = 0.09 - dv_price(-3, 6), // veta = -0.0003 - dv_price(-3, 6), // iv_error = -0.0003 - dv_number(20_240_614), - ])], - }; - let ticks = parse_greeks_all_ticks(&table).unwrap(); - assert_eq!(ticks.len(), 1); - let t = &ticks[0]; - - assert!((t.gamma - 0.012).abs() < 1e-9); - assert!((t.vanna - 0.0045).abs() < 1e-9); - assert!((t.charm - -0.0012).abs() < 1e-9); - assert!((t.vomma - 0.09).abs() < 1e-9); - assert!((t.veta - -0.0003).abs() < 1e-9); - - // First-order, third-order, and `_all`-only columns are absent - // on the wire and default to 0.0. - assert_eq!(t.delta, 0.0); - assert_eq!(t.speed, 0.0); - assert_eq!(t.zomma, 0.0); - assert_eq!(t.d1, 0.0); - assert_eq!(t.vera, 0.0); - } - - /// Vendor wire shape for `option_*_greeks_third_order`: speed / zomma / - /// color / ultima plus IV pair. This is the exact endpoint the Issue - /// #472 reporter was hitting — `option_snapshot_greeks_third_order` - /// previously emitted eight warn lines per row for the absent - /// first-order / second-order / `_all`-only columns. The test pins the - /// silent-gap behaviour so a future regression of `find_header` back - /// to `tracing::warn!` would surface here as a behavioural change. - /// Column layout pinned to upstream OpenAPI schema - /// `items_option_snapshot_greeks_third_order` (notably `vera` is NOT - /// in the third-order subset; it only ships in `_greeks_all`). - #[test] - fn parse_greeks_all_ticks_decodes_third_order_subset_with_silent_gaps() { - let table = proto::DataTable { - headers: vec![ - "ms_of_day".into(), - "implied_volatility".into(), - "speed".into(), - "zomma".into(), - "color".into(), - "ultima".into(), - "iv_error".into(), - "date".into(), - ], - data_table: vec![row_of(vec![ - dv_number(34_200_000), - dv_price(2142, 6), // implied_volatility = 0.2142 - dv_price(7, 6), // speed = 0.0007 - dv_price(15, 6), // zomma = 0.0015 - dv_price(-2, 6), // color = -0.0002 - dv_price(33, 6), // ultima = 0.0033 - dv_price(-3, 6), // iv_error = -0.0003 - dv_number(20_240_614), - ])], - }; - let ticks = parse_greeks_all_ticks(&table).unwrap(); - assert_eq!(ticks.len(), 1); - let t = &ticks[0]; - - assert!((t.speed - 0.0007).abs() < 1e-9); - assert!((t.zomma - 0.0015).abs() < 1e-9); - assert!((t.color - -0.0002).abs() < 1e-9); - assert!((t.ultima - 0.0033).abs() < 1e-9); - - // Vera is NOT a third-order column on the wire even though the - // generic `GreeksTick` struct carries the field. It must default - // to 0.0 here without warning. - assert_eq!(t.vera, 0.0); - // First-order and second-order columns also absent. - assert_eq!(t.delta, 0.0); - assert_eq!(t.gamma, 0.0); - assert_eq!(t.vanna, 0.0); - assert_eq!(t.d1, 0.0); - assert_eq!(t.dual_gamma, 0.0); - } - - /// `parse_greeks_first_order_ticks` against the column subset the - /// vendor publishes for `option_*_greeks_first_order` -- pinned to - /// `items_option_snapshot_greeks_first_order` in the upstream OpenAPI. - /// Asserts every column the parser fills decodes to the exact value - /// from the input row, and that the underlying-snapshot pair is - /// populated (the column subset is what differs from `_greeks_all`, - /// not the underlying tail). - #[test] - fn parse_greeks_first_order_ticks_decodes_first_order_subset() { - let table = proto::DataTable { - headers: vec![ - "ms_of_day".into(), - "bid".into(), - "ask".into(), - "delta".into(), - "theta".into(), - "vega".into(), - "rho".into(), - "epsilon".into(), - "lambda".into(), - "implied_volatility".into(), - "iv_error".into(), - "underlying_ms_of_day".into(), - "underlying_price".into(), - "date".into(), - ], - data_table: vec![row_of(vec![ - dv_number(34_200_000), - dv_price(15022, 6), // bid = 1.5022 - dv_price(15041, 6), // ask = 1.5041 - dv_price(5023, 6), // delta = 0.5023 - dv_price(-114, 6), // theta = -0.0114 - dv_price(8741, 6), // vega = 0.8741 - dv_price(13598, 6), // rho = 1.3598 - dv_price(-1976, 6), // epsilon = -0.1976 - dv_price(32052, 6), // lambda = 3.2052 - dv_price(2142, 6), // implied_volatility = 0.2142 - dv_price(-3, 6), // iv_error = -0.0003 - dv_number(34_200_001), - dv_price(580025, 6), // underlying_price = 58.0025 - dv_number(20_240_614), - ])], - }; - let ticks = parse_greeks_first_order_ticks(&table).unwrap(); - assert_eq!(ticks.len(), 1); - let t = &ticks[0]; - - assert_eq!(t.ms_of_day, 34_200_000); - assert!((t.bid - 1.5022).abs() < 1e-9); - assert!((t.ask - 1.5041).abs() < 1e-9); - assert!((t.delta - 0.5023).abs() < 1e-9); - assert!((t.theta - -0.0114).abs() < 1e-9); - assert!((t.vega - 0.8741).abs() < 1e-9); - assert!((t.rho - 1.3598).abs() < 1e-9); - assert!((t.epsilon - -0.1976).abs() < 1e-9); - assert!((t.lambda - 3.2052).abs() < 1e-9); - assert!((t.implied_volatility - 0.2142).abs() < 1e-9); - assert!((t.iv_error - -0.0003).abs() < 1e-9); - assert_eq!(t.underlying_ms_of_day, 34_200_001); - assert!((t.underlying_price - 58.0025).abs() < 1e-9); - assert_eq!(t.date, 20_240_614); - } - - /// `parse_greeks_second_order_ticks` against the column subset the - /// vendor publishes for `option_*_greeks_second_order` -- pinned to - /// `items_option_snapshot_greeks_second_order` in the upstream - /// OpenAPI. Second-order Greeks: gamma / vanna / charm / vomma / - /// veta plus the IV pair and the bid/ask quote pair. - #[test] - fn parse_greeks_second_order_ticks_decodes_second_order_subset() { - let table = proto::DataTable { - headers: vec![ - "ms_of_day".into(), - "bid".into(), - "ask".into(), - "gamma".into(), - "vanna".into(), - "charm".into(), - "vomma".into(), - "veta".into(), - "implied_volatility".into(), - "iv_error".into(), - "underlying_ms_of_day".into(), - "underlying_price".into(), - "date".into(), - ], - data_table: vec![row_of(vec![ - dv_number(34_200_000), - dv_price(15022, 6), // bid = 1.5022 - dv_price(15041, 6), // ask = 1.5041 - dv_price(120, 6), // gamma = 0.012 - dv_price(45, 6), // vanna = 0.0045 - dv_price(-12, 6), // charm = -0.0012 - dv_price(900, 6), // vomma = 0.09 - dv_price(-3, 6), // veta = -0.0003 - dv_price(2142, 6), // implied_volatility = 0.2142 - dv_price(-3, 6), // iv_error = -0.0003 - dv_number(34_200_001), - dv_price(580025, 6), - dv_number(20_240_614), - ])], - }; - let ticks = parse_greeks_second_order_ticks(&table).unwrap(); - assert_eq!(ticks.len(), 1); - let t = &ticks[0]; - - assert_eq!(t.ms_of_day, 34_200_000); - assert!((t.bid - 1.5022).abs() < 1e-9); - assert!((t.ask - 1.5041).abs() < 1e-9); - assert!((t.gamma - 0.012).abs() < 1e-9); - assert!((t.vanna - 0.0045).abs() < 1e-9); - assert!((t.charm - -0.0012).abs() < 1e-9); - assert!((t.vomma - 0.09).abs() < 1e-9); - assert!((t.veta - -0.0003).abs() < 1e-9); - assert!((t.implied_volatility - 0.2142).abs() < 1e-9); - assert!((t.iv_error - -0.0003).abs() < 1e-9); - assert_eq!(t.underlying_ms_of_day, 34_200_001); - assert!((t.underlying_price - 58.0025).abs() < 1e-9); - assert_eq!(t.date, 20_240_614); - } - - /// `parse_greeks_third_order_ticks` against the column subset the - /// vendor publishes for `option_*_greeks_third_order` -- pinned to - /// `items_option_snapshot_greeks_third_order` in the upstream - /// OpenAPI. Third-order Greeks: speed / zomma / color / ultima plus - /// the IV pair and the bid/ask quote pair. Notably the wire schema - /// does NOT publish `vera`; the struct does not carry it either. - #[test] - fn parse_greeks_third_order_ticks_decodes_third_order_subset() { - let table = proto::DataTable { - headers: vec![ - "ms_of_day".into(), - "bid".into(), - "ask".into(), - "speed".into(), - "zomma".into(), - "color".into(), - "ultima".into(), - "implied_volatility".into(), - "iv_error".into(), - "underlying_ms_of_day".into(), - "underlying_price".into(), - "date".into(), - ], - data_table: vec![row_of(vec![ - dv_number(34_200_000), - dv_price(15022, 6), // bid = 1.5022 - dv_price(15041, 6), // ask = 1.5041 - dv_price(7, 6), // speed = 0.0007 - dv_price(15, 6), // zomma = 0.0015 - dv_price(-2, 6), // color = -0.0002 - dv_price(33, 6), // ultima = 0.0033 - dv_price(2142, 6), // implied_volatility = 0.2142 - dv_price(-3, 6), // iv_error = -0.0003 - dv_number(34_200_001), - dv_price(580025, 6), - dv_number(20_240_614), - ])], - }; - let ticks = parse_greeks_third_order_ticks(&table).unwrap(); - assert_eq!(ticks.len(), 1); - let t = &ticks[0]; - - assert_eq!(t.ms_of_day, 34_200_000); - assert!((t.bid - 1.5022).abs() < 1e-9); - assert!((t.ask - 1.5041).abs() < 1e-9); - assert!((t.speed - 0.0007).abs() < 1e-9); - assert!((t.zomma - 0.0015).abs() < 1e-9); - assert!((t.color - -0.0002).abs() < 1e-9); - assert!((t.ultima - 0.0033).abs() < 1e-9); - assert!((t.implied_volatility - 0.2142).abs() < 1e-9); - assert!((t.iv_error - -0.0003).abs() < 1e-9); - assert_eq!(t.underlying_ms_of_day, 34_200_001); - assert!((t.underlying_price - 58.0025).abs() < 1e-9); - assert_eq!(t.date, 20_240_614); - } -} diff --git a/crates/thetadatadx/src/lib.rs b/crates/thetadatadx/src/lib.rs index f39eb7bf..a2ee7959 100644 --- a/crates/thetadatadx/src/lib.rs +++ b/crates/thetadatadx/src/lib.rs @@ -93,7 +93,6 @@ pub mod auth; pub mod config; -pub mod decode; pub mod endpoint; pub mod error; pub mod flatfiles; @@ -115,6 +114,12 @@ mod macros; pub mod mdds; +// `decode` is re-exported from `mdds::decode` to preserve the public surface +// (`thetadatadx::decode::*`). Wave 2 split the original decode.rs god-file +// into `mdds/decode/{error, headers, transport, extract, cell, v3}`; the +// re-export keeps existing consumer paths unchanged. +pub use mdds::decode; + /// Generated protobuf types from `mdds.proto` (package `BetaEndpoints`). #[allow(clippy::pedantic)] pub mod proto { diff --git a/crates/thetadatadx/src/mdds/decode/cell.rs b/crates/thetadatadx/src/mdds/decode/cell.rs new file mode 100644 index 00000000..41cc01c2 --- /dev/null +++ b/crates/thetadatadx/src/mdds/decode/cell.rs @@ -0,0 +1,304 @@ +//! Per-cell strict decoders (`row_*`) and the generated parser surface. +//! +//! Each `row_*` function dispatches on the cell's wire type rather than +//! coalescing silently — wire-protocol anomalies (`DataValue` with the +//! `data_type` oneof unset) surface as +//! [`DecodeError::TypeMismatch { observed: "Unset" }`] rather than collapsing +//! to a default value. +//! +//! The macro-generated `decode_generated` module (assembled by `build.rs` from +//! `tick_schema.toml`) is included from this module; its emitted parser +//! functions reference `crate::decode::*` for the cross-cutting helpers and +//! `tdbe::time::*` for Eastern-time conversion. + +use super::error::observed_name; +use super::error::DecodeError; +use super::headers::find_header; +use super::v3::parse_iso_date; +use crate::proto; +use tdbe::types::tick::{ + CalendarDay, EodTick, GreeksAllTick, GreeksFirstOrderTick, GreeksSecondOrderTick, + GreeksThirdOrderTick, InterestRateTick, IvTick, MarketValueTick, OhlcTick, OpenInterestTick, + OptionContract, PriceTick, QuoteTick, TradeQuoteTick, TradeTick, +}; + +/// Extract a date (YYYYMMDD) from a `Number` or `Timestamp` cell, strictly. +/// +/// Used by generated parsers when the `date` field maps to a `timestamp` column. +/// `Number` carries the date already in YYYYMMDD form; `Timestamp` is converted +/// to an Eastern-Time YYYYMMDD integer. `NullValue` yields `Ok(None)`; any +/// other type yields `Err(TypeMismatch)`. +/// +/// # Errors +/// +/// Returns [`DecodeError::TypeMismatch`] if the cell is neither a `Number`, +/// `Timestamp`, nor `NullValue` — including the case where the `DataValue` +/// arrived with its `data_type` oneof unset (`observed: "Unset"`), which is a +/// wire-protocol anomaly we fail loud on. Returns [`DecodeError::MissingCell`] +/// only when the row has fewer cells than `idx` (index out of bounds). +// Reason: number values from protobuf fit in i32 for date/integer fields. +#[allow(clippy::cast_possible_truncation)] +pub(crate) fn row_date(row: &proto::DataValueList, idx: usize) -> Result, DecodeError> { + let Some(dv) = row.values.get(idx) else { + return Err(DecodeError::MissingCell { column: idx }); + }; + match dv.data_type.as_ref() { + Some(proto::data_value::DataType::Number(n)) => Ok(Some(*n as i32)), + Some(proto::data_value::DataType::Timestamp(ts)) => { + Ok(Some(tdbe::time::timestamp_to_date(ts.epoch_ms))) + } + Some(proto::data_value::DataType::NullValue(_)) => Ok(None), + other => Err(DecodeError::TypeMismatch { + column: idx, + expected: "Number|Timestamp", + observed: observed_name(other), + }), + } +} + +/// Decode an `i32`-valued cell with Java-matching strict semantics. +/// +/// Accepts: +/// - `Number(n)` → `Ok(Some(n as i32))`. +/// - `Timestamp(ts)` → `Ok(Some(ms_of_day))` — v3 MDDS sends time columns as +/// proto `Timestamp`; the parser expects milliseconds-of-day in Eastern Time. +/// - `NullValue` → `Ok(None)`, matching Java `null` return. +/// +/// Any other variant produces [`DecodeError::TypeMismatch`], including the +/// case where the `DataValue` arrived with its `data_type` oneof unset +/// (`observed: "Unset"`) — a wire anomaly we fail loud on. A row shorter than +/// `idx` (index out of bounds) produces [`DecodeError::MissingCell`]. +/// +/// # Errors +/// +/// See variant list above. +// Reason: protocol-defined integer widths from Java FPSS specification. +#[allow(clippy::cast_possible_truncation)] +pub(crate) fn row_number( + row: &proto::DataValueList, + idx: usize, +) -> Result, DecodeError> { + let Some(dv) = row.values.get(idx) else { + return Err(DecodeError::MissingCell { column: idx }); + }; + match dv.data_type.as_ref() { + Some(proto::data_value::DataType::Number(n)) => Ok(Some(*n as i32)), + Some(proto::data_value::DataType::Timestamp(ts)) => { + Ok(Some(tdbe::time::timestamp_to_ms_of_day(ts.epoch_ms))) + } + Some(proto::data_value::DataType::NullValue(_)) => Ok(None), + other => Err(DecodeError::TypeMismatch { + column: idx, + expected: "Number|Timestamp", + observed: observed_name(other), + }), + } +} + +/// Extract raw price value from a `Price` cell (test-only helper). +/// +/// `Price(p)` → `Ok(Some(p.value))`; `NullValue` → `Ok(None)`; other types +/// error. Missing cell errors. +/// +/// # Errors +/// +/// See [`row_number`]. +#[cfg(test)] +#[allow(clippy::cast_possible_truncation)] +pub(crate) fn row_price_value( + row: &proto::DataValueList, + idx: usize, +) -> Result, DecodeError> { + let Some(dv) = row.values.get(idx) else { + return Err(DecodeError::MissingCell { column: idx }); + }; + match dv.data_type.as_ref() { + Some(proto::data_value::DataType::Price(p)) => Ok(Some(p.value)), + Some(proto::data_value::DataType::NullValue(_)) => Ok(None), + other => Err(DecodeError::TypeMismatch { + column: idx, + expected: "Price", + observed: observed_name(other), + }), + } +} + +/// Extract raw price type from a `Price` cell (test-only helper). +/// +/// # Errors +/// +/// See [`row_price_value`]. +#[cfg(test)] +#[allow(clippy::cast_possible_truncation)] +pub(crate) fn row_price_type( + row: &proto::DataValueList, + idx: usize, +) -> Result, DecodeError> { + let Some(dv) = row.values.get(idx) else { + return Err(DecodeError::MissingCell { column: idx }); + }; + match dv.data_type.as_ref() { + Some(proto::data_value::DataType::Price(p)) => Ok(Some(p.r#type)), + Some(proto::data_value::DataType::NullValue(_)) => Ok(None), + other => Err(DecodeError::TypeMismatch { + column: idx, + expected: "Price", + observed: observed_name(other), + }), + } +} + +/// Decode a price-valued cell to `f64`, using the cell's own `price_type`. +/// +/// Accepts both `Price` (the schema type) and `Number` — v3 MDDS occasionally +/// sends whole-dollar quantities as plain `Number` cells where the schema +/// would otherwise expect `Price`. `NullValue` returns `Ok(None)`. +/// +/// # Errors +/// +/// Errors on any other cell type or missing cell. +// Reason: protocol-defined integer widths from Java FPSS specification. +#[allow(clippy::cast_possible_truncation)] +pub(crate) fn row_price_f64( + row: &proto::DataValueList, + idx: usize, +) -> Result, DecodeError> { + let Some(dv) = row.values.get(idx) else { + return Err(DecodeError::MissingCell { column: idx }); + }; + match dv.data_type.as_ref() { + Some(proto::data_value::DataType::Price(p)) => Ok(Some( + tdbe::types::price::Price::new(p.value, p.r#type).to_f64(), + )), + Some(proto::data_value::DataType::Number(n)) => Ok(Some(*n as f64)), + Some(proto::data_value::DataType::NullValue(_)) => Ok(None), + other => Err(DecodeError::TypeMismatch { + column: idx, + expected: "Price|Number", + observed: observed_name(other), + }), + } +} + +/// Decode a text-valued cell. +/// +/// `Text(s)` → `Ok(Some(s))`, `NullValue` → `Ok(None)`. +/// +/// # Errors +/// +/// Errors on any other cell type or missing cell. +pub(crate) fn row_text( + row: &proto::DataValueList, + idx: usize, +) -> Result, DecodeError> { + let Some(dv) = row.values.get(idx) else { + return Err(DecodeError::MissingCell { column: idx }); + }; + match dv.data_type.as_ref() { + Some(proto::data_value::DataType::Text(s)) => Ok(Some(s.clone())), + Some(proto::data_value::DataType::NullValue(_)) => Ok(None), + other => Err(DecodeError::TypeMismatch { + column: idx, + expected: "Text", + observed: observed_name(other), + }), + } +} + +/// Decode an `i64`-valued cell. +/// +/// `Number(n)` → `Ok(Some(n))`; `Price(p)` → scaled with i64-native +/// arithmetic (no f64 hop), so values past `2^53` round-trip bit-exact; +/// `NullValue` → `Ok(None)`. +/// +/// Used by the generated parsers for schema columns typed `i64` — added +/// with the EodTick `volume`/`count` widening (where on high-volume +/// symbols the values exceed `i32::MAX`). +/// +/// `price_type` is clamped to `0..=19` to match +/// [`tdbe::types::price::Price::new`], so the same wire cell decodes +/// identically through this function and [`row_price_f64`]. +/// +/// # Errors +/// +/// Returns `DecodeError::TypeMismatch` for any other cell variant. Returns +/// `DecodeError::MissingCell` for an out-of-bounds column index. Under the +/// clamped `0..=19` price-type contract, scale-up cannot overflow `i64` +/// (max product is `i32::MAX * 10^9 ≈ 2.15e18`, well under `i64::MAX`). +pub(crate) fn row_number_i64( + row: &proto::DataValueList, + idx: usize, +) -> Result, DecodeError> { + let Some(dv) = row.values.get(idx) else { + return Err(DecodeError::MissingCell { column: idx }); + }; + match dv.data_type.as_ref() { + Some(proto::data_value::DataType::Number(n)) => Ok(Some(*n)), + Some(proto::data_value::DataType::Price(p)) => { + // Vendor convention: real_value = value * 10^(type - 10). + // Clamp `type` to 0..=19 to match `tdbe::Price::new`, so the + // same wire cell decodes identically through `row_price_f64` + // and `row_number_i64`. Positive exp scales up; negative exp + // scales down. v == 0 short-circuits to 0 so a zero price + // never trips the scale-up overflow guard. + let v = i64::from(p.value); + if v == 0 { + return Ok(Some(0)); + } + let price_type = p.r#type.clamp(0, 19); + let exp = price_type - 10; + // After clamping, exp ∈ [-10, 9]. Scale-up: i32::MAX * 10^9 + // ≈ 2.147e18 < i64::MAX (≈ 9.22e18), so checked_mul cannot + // overflow. checked_mul preserves the contract anyway. + let scaled = if exp >= 0 { + 10i64 + .checked_pow(exp.unsigned_abs()) + .and_then(|m| v.checked_mul(m)) + } else { + Some(v / 10i64.pow(exp.unsigned_abs())) + }; + match scaled { + Some(n) => Ok(Some(n)), + None => Err(DecodeError::TypeMismatch { + column: idx, + expected: "i64-fitting Price", + observed: "Price overflowing i64", + }), + } + } + Some(proto::data_value::DataType::NullValue(_)) => Ok(None), + other => Err(DecodeError::TypeMismatch { + column: idx, + expected: "Number|Price", + observed: observed_name(other), + }), + } +} + +/// Borrow the cell at `idx`, returning an error if the row is too short. +pub(crate) fn cell_type( + row: &proto::DataValueList, + idx: usize, +) -> Result, DecodeError> { + let Some(dv) = row.values.get(idx) else { + return Err(DecodeError::MissingCell { column: idx }); + }; + Ok(dv.data_type.as_ref()) +} + +// Generated code -- parser functions from tick_schema.toml by build.rs. +// +// The emitted parser bodies reference: +// * `crate::proto::*` for wire types +// * `crate::decode::{observed_name, parse_iso_date, ...}` for shared helpers +// * `tdbe::time::{timestamp_to_ms_of_day, timestamp_to_date}` for ET conversion +// +// All of these resolve through the re-exports in `crate::mdds::decode` (which +// `crate::decode` re-exports at the crate root) so the generator's path +// assumptions remain intact after the split. +#[allow(clippy::pedantic)] // Reason: auto-generated parser code, not under our control. +mod decode_generated { + use super::*; + include!(concat!(env!("OUT_DIR"), "/decode_generated.rs")); +} +pub use decode_generated::*; diff --git a/crates/thetadatadx/src/mdds/decode/error.rs b/crates/thetadatadx/src/mdds/decode/error.rs new file mode 100644 index 00000000..20459021 --- /dev/null +++ b/crates/thetadatadx/src/mdds/decode/error.rs @@ -0,0 +1,74 @@ +//! Per-cell decode errors and `DataType` variant naming for diagnostics. +//! +//! Mirrors the Java terminal's `IllegalArgumentException` path in +//! `PojoMessageUtils.convert`. Schema-drift guards in the generated parsers +//! raise [`DecodeError::MissingRequiredHeader`] when an upstream column is +//! absent, and the streaming accumulator raises +//! [`DecodeError::ChunkHeaderDrift`] when a mid-stream chunk's header set +//! diverges from the first chunk's schema. + +use crate::proto; +use thiserror::Error as ThisError; + +/// Per-cell decode failure. Produced by the `row_*` helpers when a cell does +/// not match the column's declared type, or when the requested column index is +/// past the end of the row. Mirrors the Java terminal's `IllegalArgumentException` +/// path in `PojoMessageUtils.convert`. +#[derive(Debug, ThisError, PartialEq, Eq)] +pub enum DecodeError { + /// Cell exists but its `DataType` variant does not match the declared + /// schema for this column. + #[error("column {column}: expected {expected}, got {observed}")] + TypeMismatch { + column: usize, + expected: &'static str, + observed: &'static str, + }, + /// Row has fewer cells than the requested column index. + #[error("column {column}: missing cell")] + MissingCell { column: usize }, + /// A required header (declared in `tick_schema.toml` under + /// `required = [...]`) is absent from a non-empty `DataTable`. Emitted by + /// the generated parsers when the server has added or renamed the column — + /// surfacing this as an error is the only way to prevent silent data loss + /// when the upstream schema drifts (see `HEADER_ALIASES` for known + /// synonyms). Empty `DataTable`s (no rows) still return `Ok(vec![])` + /// because "no trades today" is a legitimate outcome. + #[error( + "required column `{header}` missing from {rows}-row DataTable; \ + available headers: {available}" + )] + MissingRequiredHeader { + header: &'static str, + rows: usize, + available: String, + }, + /// A mid-stream gRPC chunk carries a header set that does not match the + /// header set established by the first chunk. The stream accumulator + /// used to silently retain the first header set and accumulate rows + /// from every chunk underneath it, which would transparently corrupt + /// a row set if the server's wire schema changed mid-response. This + /// variant surfaces the drift instead of hiding it. + #[error( + "chunk {chunk_index} headers drifted from first-chunk schema; \ + first: [{first}]; chunk: [{chunk}]" + )] + ChunkHeaderDrift { + chunk_index: usize, + first: String, + chunk: String, + }, +} + +/// Name the `DataType` variant for error messages. `None` is treated as a +/// missing `data_type` oneof (protobuf cell with no variant set). +pub(crate) fn observed_name(dt: Option<&proto::data_value::DataType>) -> &'static str { + match dt { + Some(proto::data_value::DataType::Number(_)) => "Number", + Some(proto::data_value::DataType::Text(_)) => "Text", + Some(proto::data_value::DataType::Price(_)) => "Price", + Some(proto::data_value::DataType::Timestamp(_)) => "Timestamp", + Some(proto::data_value::DataType::NullValue(_)) => "NullValue", + None => "Unset", + } +} diff --git a/crates/thetadatadx/src/mdds/decode/extract.rs b/crates/thetadatadx/src/mdds/decode/extract.rs new file mode 100644 index 00000000..c84cf9ae --- /dev/null +++ b/crates/thetadatadx/src/mdds/decode/extract.rs @@ -0,0 +1,79 @@ +//! Column-extraction helpers (Number / Text / Price) over a `DataTable`. +//! +//! These helpers return `Vec>` keyed by the column header. They +//! drive the macro-generated list endpoints in `crate::macros` and the +//! Polars / Arrow column projections. + +use crate::proto; + +/// Extract a column of i64 values from a `DataTable` by header name. +#[must_use] +pub fn extract_number_column(table: &proto::DataTable, header: &str) -> Vec> { + let Some(col_idx) = table.headers.iter().position(|h| h == header) else { + return vec![]; + }; + + table + .data_table + .iter() + .map(|row| { + row.values + .get(col_idx) + .and_then(|dv| dv.data_type.as_ref()) + .and_then(|dt| match dt { + proto::data_value::DataType::Number(n) => Some(*n), + _ => None, + }) + }) + .collect() +} + +/// Extract a column of string values from a `DataTable` by header name. +#[must_use] +pub fn extract_text_column(table: &proto::DataTable, header: &str) -> Vec> { + let Some(col_idx) = table.headers.iter().position(|h| h == header) else { + return vec![]; + }; + + table + .data_table + .iter() + .map(|row| { + row.values + .get(col_idx) + .and_then(|dv| dv.data_type.as_ref()) + .and_then(|dt| match dt { + proto::data_value::DataType::Text(s) => Some(s.clone()), + proto::data_value::DataType::Number(n) => Some(n.to_string()), + proto::data_value::DataType::Price(p) => { + Some(format!("{}", tdbe::Price::new(p.value, p.r#type).to_f64())) + } + _ => None, + }) + }) + .collect() +} + +/// Extract a column of Price values from a `DataTable` by header name. +#[must_use] +pub fn extract_price_column(table: &proto::DataTable, header: &str) -> Vec> { + let Some(col_idx) = table.headers.iter().position(|h| h == header) else { + return vec![]; + }; + + table + .data_table + .iter() + .map(|row| { + row.values + .get(col_idx) + .and_then(|dv| dv.data_type.as_ref()) + .and_then(|dt| match dt { + proto::data_value::DataType::Price(p) => { + Some(tdbe::Price::new(p.value, p.r#type)) + } + _ => None, + }) + }) + .collect() +} diff --git a/crates/thetadatadx/src/mdds/decode/headers.rs b/crates/thetadatadx/src/mdds/decode/headers.rs new file mode 100644 index 00000000..3ada8c64 --- /dev/null +++ b/crates/thetadatadx/src/mdds/decode/headers.rs @@ -0,0 +1,74 @@ +//! Header alias table and lookup helper. +//! +//! v3 MDDS uses different column names than the tick schema. [`HEADER_ALIASES`] +//! maps schema names to their v3 equivalents so generated and hand-written +//! parsers work with both the schema and v3 wire payloads. + +/// Header aliases: v3 MDDS uses different column names than the tick schema. +/// This maps schema names to their v3 equivalents so parsers work with both. +/// +/// Validated against a real v3 MDDS response capture (see +/// `tests/fixtures/captures/`). Each entry is `(schema_name, server_name)`: +/// `find_header("ms_of_day", h)` returns the index of the first matching +/// server column in `h`. +pub(crate) const HEADER_ALIASES: &[(&str, &str)] = &[ + // Generic time column: MDDS sends a proto `Timestamp`, the tick schema + // models it as an i32 ms-of-day. `row_number` handles the conversion. + ("ms_of_day", "timestamp"), + ("ms_of_day", "created"), + // Combined trade + quote responses split the two time columns into + // `trade_timestamp` (the trade side → `ms_of_day`) and `quote_timestamp` + // (the quote side → `quote_ms_of_day`). Without these aliases the + // `TradeQuoteTick` parser falls through the required-header guard and + // produces an empty Vec on ~1M-row responses (P11). + ("ms_of_day", "trade_timestamp"), + ("quote_ms_of_day", "quote_timestamp"), + ("ms_of_day2", "timestamp2"), + ("ms_of_day2", "last_trade"), + ("date", "timestamp"), + ("date", "created"), + ("date", "trade_timestamp"), + // option_list_contracts returns "symbol" where the schema says "root" + ("root", "symbol"), + // v3 uses "implied_vol" where the schema says "implied_volatility" + ("implied_volatility", "implied_vol"), + // The vendor's per-order Greeks endpoints (`option_*_greeks_*_order`) + // and the `_greeks_all` / `_greeks_eod` endpoints publish the + // underlying snapshot timestamp as `underlying_timestamp`. The tick + // schema models it as `underlying_ms_of_day` so the wire conversion + // (Timestamp -> ms-of-day) flows through the standard `row_number` + // path without a per-tick parser branch. + ("underlying_ms_of_day", "underlying_timestamp"), +]; + +/// Helper: find a column index by name, with alias fallback. +/// +/// The v3 MDDS server uses `timestamp` where the tick schema says `ms_of_day`. +/// This function checks the primary name first, then falls back to known aliases. +/// +/// Returns `None` silently when the header is absent — required-header +/// guards in the generated parsers surface a typed +/// [`crate::error::Error::MissingRequiredHeader`] for the must-have columns; +/// optional columns missing from a subset response (e.g. +/// `option_snapshot_greeks_third_order` returning only the third-order Greek +/// columns from the `GreeksTick` union schema) are by design. Header drift +/// can be observed at the `trace` level via `RUST_LOG=thetadatadx=trace`. +pub(crate) fn find_header(headers: &[&str], name: &str) -> Option { + // Try exact match first. + if let Some(pos) = headers.iter().position(|&s| s == name) { + return Some(pos); + } + // Try aliases. + for &(schema_name, server_name) in HEADER_ALIASES { + if name == schema_name { + if let Some(pos) = headers.iter().position(|&s| s == server_name) { + return Some(pos); + } + } + } + tracing::trace!( + header = name, + "column header not present in DataTable (optional or subset response)" + ); + None +} diff --git a/crates/thetadatadx/src/mdds/decode/mod.rs b/crates/thetadatadx/src/mdds/decode/mod.rs new file mode 100644 index 00000000..70560bd0 --- /dev/null +++ b/crates/thetadatadx/src/mdds/decode/mod.rs @@ -0,0 +1,47 @@ +//! `ResponseData` → `DataTable` → tick decoders. +//! +//! Wave 2 split of the original `crates/thetadatadx/src/decode.rs` god-file: +//! +//! | Submodule | Concern | +//! |--------------|---------------------------------------------------------------| +//! | [`error`] | [`DecodeError`] enum + `observed_name` diagnostic helper | +//! | [`headers`] | `HEADER_ALIASES` v3 ↔ schema map + `find_header` lookup | +//! | [`transport`]| `decompress_response` / `decode_data_table` zstd path | +//! | [`extract`] | `extract_{number,text,price}_column` column projections | +//! | [`cell`] | Per-cell strict decoders (`row_*`) + generated parser surface | +//! | [`v3`] | Hand-written v3 parsers (`parse_option_contracts_v3`, …) | +//! +//! Public API surface is preserved at `thetadatadx::decode::*` via the +//! crate-root re-export of this module. Eastern-time / DST primitives +//! previously living here have moved to [`tdbe::time`] and are reused by +//! the FPSS latency path. + +pub mod cell; +pub mod error; +pub mod extract; +pub mod headers; +pub mod transport; +pub mod v3; + +pub use error::DecodeError; +pub use extract::{extract_number_column, extract_price_column, extract_text_column}; +pub use transport::{decode_data_table, decompress_response}; +pub use v3::{ + parse_calendar_days_v3, parse_option_contracts_v3, CALENDAR_STATUS_EARLY_CLOSE, + CALENDAR_STATUS_FULL_CLOSE, CALENDAR_STATUS_OPEN, CALENDAR_STATUS_UNKNOWN, + CALENDAR_STATUS_WEEKEND, +}; + +// Re-export the macro-generated parser functions (`parse_trade_ticks`, +// `parse_eod_ticks`, etc.) at this module's top level so external consumers +// (sdks/python, benches) can keep using `thetadatadx::decode::parse_*`. +pub use cell::*; + +// `observed_name` is `pub(crate)` and intentionally not part of the public +// surface; it stays accessible as `crate::decode::observed_name` via this +// re-export so the generated parser code (emitted by `build.rs` from the +// templates in `build_support/ticks/templates/parser/`) still resolves it. +pub(crate) use error::observed_name; + +#[cfg(test)] +mod tests; diff --git a/crates/thetadatadx/src/mdds/decode/tests.rs b/crates/thetadatadx/src/mdds/decode/tests.rs new file mode 100644 index 00000000..b741e3c4 --- /dev/null +++ b/crates/thetadatadx/src/mdds/decode/tests.rs @@ -0,0 +1,1092 @@ +//! Tests for the row-cell decoders, column extractors, and v3 hand-written +//! parsers. Eastern-time / DST primitive tests live with their canonical +//! home in `tdbe::time`. + +use super::cell::{ + row_number, row_number_i64, row_price_f64, row_price_type, row_price_value, row_text, +}; +use super::v3::{ + parse_calendar_days_v3, parse_iso_date, parse_option_contracts_v3, parse_time_text, +}; +use super::v3::{ + CALENDAR_STATUS_EARLY_CLOSE, CALENDAR_STATUS_FULL_CLOSE, CALENDAR_STATUS_OPEN, + CALENDAR_STATUS_WEEKEND, +}; +use super::{ + extract_number_column, parse_eod_ticks, parse_greeks_all_ticks, parse_greeks_first_order_ticks, + parse_greeks_second_order_ticks, parse_greeks_third_order_ticks, parse_trade_ticks, + DecodeError, +}; +use crate::proto; + +/// Build a DataValue containing a Number. +fn dv_number(n: i64) -> proto::DataValue { + proto::DataValue { + data_type: Some(proto::data_value::DataType::Number(n)), + } +} + +/// Build a DataValue containing a Price. +fn dv_price(value: i32, r#type: i32) -> proto::DataValue { + proto::DataValue { + data_type: Some(proto::data_value::DataType::Price(proto::Price { + value, + r#type, + })), + } +} + +/// Build a DataValue containing NullValue. +fn dv_null() -> proto::DataValue { + proto::DataValue { + data_type: Some(proto::data_value::DataType::NullValue(0)), + } +} + +/// Build a DataValue containing a Timestamp. +fn dv_timestamp(epoch_ms: u64) -> proto::DataValue { + proto::DataValue { + data_type: Some(proto::data_value::DataType::Timestamp( + proto::ZonedDateTime { epoch_ms, zone: 0 }, + )), + } +} + +/// Build a DataValue with no data_type set (missing). +fn dv_missing() -> proto::DataValue { + proto::DataValue { data_type: None } +} + +/// Build a DataValue containing Text. +fn dv_text(s: &str) -> proto::DataValue { + proto::DataValue { + data_type: Some(proto::data_value::DataType::Text(s.to_string())), + } +} + +fn row_of(values: Vec) -> proto::DataValueList { + proto::DataValueList { values } +} + +#[test] +fn row_number_returns_value_for_number_cell() { + let row = row_of(vec![dv_number(42)]); + assert_eq!(row_number(&row, 0).unwrap(), Some(42)); +} + +#[test] +fn row_number_returns_none_for_null_cell() { + let row = row_of(vec![dv_null()]); + assert_eq!(row_number(&row, 0).unwrap(), None); +} + +#[test] +fn row_number_errors_on_unset_cell() { + // A DataValue with the oneof unset is a wire-protocol anomaly. + // Java's `PojoMessageUtils.convert` hits the default arm for + // `DATATYPE_NOT_SET` and throws `IllegalArgumentException`; we + // surface it as `TypeMismatch { observed: "Unset" }`. + let row = row_of(vec![dv_missing()]); + assert_eq!( + row_number(&row, 0), + Err(DecodeError::TypeMismatch { + column: 0, + expected: "Number|Timestamp", + observed: "Unset", + }) + ); +} + +#[test] +fn row_number_errors_on_out_of_bounds() { + let row = row_of(vec![]); + assert_eq!( + row_number(&row, 5), + Err(DecodeError::MissingCell { column: 5 }) + ); +} + +#[test] +fn row_number_errors_on_text_cell() { + let row = row_of(vec![dv_text("oops")]); + assert_eq!( + row_number(&row, 0), + Err(DecodeError::TypeMismatch { + column: 0, + expected: "Number|Timestamp", + observed: "Text", + }) + ); +} + +#[test] +fn row_number_errors_on_price_cell() { + let row = row_of(vec![dv_price(12345, 10)]); + assert_eq!( + row_number(&row, 0), + Err(DecodeError::TypeMismatch { + column: 0, + expected: "Number|Timestamp", + observed: "Price", + }) + ); +} + +#[test] +fn row_number_accepts_timestamp_for_time_columns() { + // v3 MDDS sends `ms_of_day` as a Timestamp. + let epoch_ms: u64 = 1_775_050_200_000; // 2026-04-01 09:30 ET + let row = row_of(vec![dv_timestamp(epoch_ms)]); + assert_eq!(row_number(&row, 0).unwrap(), Some(34_200_000)); +} + +#[test] +fn row_text_errors_on_number_cell() { + let row = row_of(vec![dv_number(42)]); + assert_eq!( + row_text(&row, 0), + Err(DecodeError::TypeMismatch { + column: 0, + expected: "Text", + observed: "Number", + }) + ); +} + +#[test] +fn row_price_f64_accepts_number_cell() { + // Documented v3 MDDS behavior: f64 fields may arrive as plain Number. + let row = row_of(vec![dv_number(1_500_000)]); + assert_eq!(row_price_f64(&row, 0).unwrap(), Some(1_500_000.0)); +} + +#[test] +fn row_price_value_returns_value_for_price_cell() { + let row = row_of(vec![dv_price(12345, 10)]); + assert_eq!(row_price_value(&row, 0).unwrap(), Some(12345)); +} + +#[test] +fn row_price_value_returns_none_for_null_cell() { + let row = row_of(vec![dv_null()]); + assert_eq!(row_price_value(&row, 0).unwrap(), None); +} + +#[test] +fn row_price_type_returns_type_for_price_cell() { + let row = row_of(vec![dv_price(12345, 10)]); + assert_eq!(row_price_type(&row, 0).unwrap(), Some(10)); +} + +#[test] +fn row_price_type_returns_none_for_null_cell() { + let row = row_of(vec![dv_null()]); + assert_eq!(row_price_type(&row, 0).unwrap(), None); +} + +#[test] +fn null_cells_dont_corrupt_trade_ticks() { + // Build a minimal DataTable with one row that has a NullValue in a field. + // Note: "price" header triggers Price-typed extraction, so we use a Price cell. + let table = proto::DataTable { + headers: vec![ + "ms_of_day".into(), + "sequence".into(), + "ext_condition1".into(), + "ext_condition2".into(), + "ext_condition3".into(), + "ext_condition4".into(), + "condition".into(), + "size".into(), + "exchange".into(), + "price".into(), + "condition_flags".into(), + "price_flags".into(), + "volume_type".into(), + "records_back".into(), + "date".into(), + ], + data_table: vec![row_of(vec![ + dv_number(34200000), // ms_of_day + dv_number(1), // sequence + dv_null(), // ext_condition1 = NullValue + dv_number(0), // ext_condition2 + dv_number(0), // ext_condition3 + dv_number(0), // ext_condition4 + dv_number(0), // condition + dv_number(100), // size + dv_number(4), // exchange + dv_price(15000, 10), // price (Price-typed because header is "price") + dv_number(0), // condition_flags + dv_number(0), // price_flags + dv_number(0), // volume_type + dv_number(0), // records_back + dv_number(20240301), // date + ])], + }; + + let ticks = parse_trade_ticks(&table).unwrap(); + assert_eq!(ticks.len(), 1); + let tick = &ticks[0]; + assert_eq!(tick.ms_of_day, 34200000); + // NullValue should default to 0, not corrupt subsequent fields. + assert_eq!(tick.ext_condition1, 0); + assert_eq!(tick.size, 100); + assert!((tick.price - 15000.0).abs() < 1e-10); + assert_eq!(tick.date, 20240301); +} + +#[test] +fn extract_number_column_returns_none_for_null() { + let table = proto::DataTable { + headers: vec!["val".into()], + data_table: vec![ + row_of(vec![dv_number(10)]), + row_of(vec![dv_null()]), + row_of(vec![dv_number(30)]), + ], + }; + + let col = extract_number_column(&table, "val"); + assert_eq!(col, vec![Some(10), None, Some(30)]); +} + +#[test] +fn parse_eod_timestamp_aliases_decode_time_and_date_separately() { + // 2026-04-01 13:30:00 UTC = 2026-04-01 09:30:00 ET (EDT). + let epoch_ms: u64 = 1_775_050_200_000; + let table = proto::DataTable { + headers: vec![ + "timestamp".into(), + "timestamp2".into(), + "open".into(), + "close".into(), + ], + data_table: vec![row_of(vec![ + dv_timestamp(epoch_ms), + dv_timestamp(epoch_ms), + dv_number(15000), + dv_number(15100), + ])], + }; + + let ticks = parse_eod_ticks(&table).unwrap(); + assert_eq!(ticks.len(), 1); + assert_eq!(ticks[0].ms_of_day, 34_200_000); + assert_eq!(ticks[0].ms_of_day2, 34_200_000); + assert_eq!(ticks[0].date, 20260401); + assert!((ticks[0].open - 15000.0).abs() < 1e-10); + assert!((ticks[0].close - 15100.0).abs() < 1e-10); +} + +#[test] +fn row_number_i64_decodes_price_cells() { + // MDDS sends large integer fields as Price cells, not Number cells. + // Price encoding: price_type centered at 10. + // type=10 → value as-is, type=13 → value * 10^3, type=7 → value / 10^3 + // Example: Price { value: 3842, type: 19 } = 3842 * 10^9 = 3_842_000_000_000 + let row = row_of(vec![dv_price(3842, 19)]); + assert_eq!( + row_number_i64(&row, 0).unwrap(), + Some(3_842_000_000_000_i64) + ); +} + +#[test] +fn row_number_i64_still_decodes_number_cells() { + let row = row_of(vec![dv_number(999_999_999)]); + assert_eq!(row_number_i64(&row, 0).unwrap(), Some(999_999_999)); +} + +#[test] +fn row_number_i64_returns_none_for_null() { + let row = row_of(vec![dv_null()]); + assert_eq!(row_number_i64(&row, 0).unwrap(), None); +} + +#[test] +fn row_number_i64_errors_on_text_cell() { + let row = row_of(vec![dv_text("oops")]); + assert_eq!( + row_number_i64(&row, 0), + Err(DecodeError::TypeMismatch { + column: 0, + expected: "Number|Price", + observed: "Text", + }) + ); +} + +/// Pin a Price cell past `2^53` to the i64-native result for `type=17`. +#[test] +fn row_number_i64_price_cell_returns_bit_exact_i64() { + let row = row_of(vec![dv_price(1_073_741_823, 17)]); + let got = row_number_i64(&row, 0).unwrap().expect("Some"); + assert_eq!(got, 10_737_418_230_000_000_i64); + assert!(got > (1_i64 << 53)); +} + +/// `value == 0` decodes to 0 regardless of the exponent. Mathematically +/// the product is zero; the decoder must not reject a zero cell, even +/// when `price_type` is at the clamp boundary. +#[test] +fn row_number_i64_price_zero_value_short_circuits() { + let row = row_of(vec![dv_price(0, 19)]); + assert_eq!(row_number_i64(&row, 0), Ok(Some(0))); +} + +/// `row_number_i64` and `row_price_f64` must agree on the same wire +/// cell. With `type=19` (in-range) and `value=42`, `row_price_f64` +/// routes through `Price::new` which keeps `price_type=19`, and +/// `row_number_i64` produces the i64-native scale. Both should match. +/// Manual: 42 * 10^(19-10) = 42 * 10^9 = 42_000_000_000. +#[test] +fn row_number_i64_matches_row_price_f64_at_type_19() { + let row = row_of(vec![dv_price(42, 19)]); + let as_int = row_number_i64(&row, 0).unwrap().expect("Some"); + let as_float = row_price_f64(&row, 0).unwrap().expect("Some"); + assert_eq!(as_int, 42_000_000_000_i64); + assert!((as_float - 42_000_000_000.0_f64).abs() < 1.0); +} + +/// `price_type=20` is out-of-range; both decoders must clamp to 19 +/// (matching `Price::new`). A `type=20` cell and a `type=19` cell with +/// the same value must therefore decode to the same i64. +#[test] +fn row_number_i64_clamps_price_type_above_19() { + let row_clamped = row_of(vec![dv_price(7, 20)]); + let row_in_range = row_of(vec![dv_price(7, 19)]); + assert_eq!( + row_number_i64(&row_clamped, 0).unwrap(), + row_number_i64(&row_in_range, 0).unwrap(), + ); + // Pin the absolute value too: 7 * 10^9 = 7_000_000_000. + assert_eq!( + row_number_i64(&row_clamped, 0).unwrap(), + Some(7_000_000_000_i64) + ); +} + +/// Maximum scale-up under the clamped contract: `value=i32::MAX, +/// type=19` yields `i32::MAX * 10^9 = 2_147_483_647_000_000_000`, +/// which is below `i64::MAX = 9_223_372_036_854_775_807`. The product +/// must fit and decode bit-exact (no `TypeMismatch`). +#[test] +fn row_number_i64_max_in_range_price_fits_i64() { + let row = row_of(vec![dv_price(i32::MAX, 19)]); + assert_eq!( + row_number_i64(&row, 0).unwrap(), + Some(2_147_483_647_000_000_000_i64), + ); +} + +#[test] +fn parse_calendar_v3_holiday() { + // Simulate calendar_year response for a holiday (full_close). + let table = proto::DataTable { + headers: vec!["date".into(), "type".into(), "open".into(), "close".into()], + data_table: vec![row_of(vec![ + dv_text("2025-01-01"), + dv_text("full_close"), + dv_null(), + dv_null(), + ])], + }; + + let days = parse_calendar_days_v3(&table).unwrap(); + assert_eq!(days.len(), 1); + let d = &days[0]; + assert_eq!(d.date, 20250101); + assert_eq!(d.is_open, 0); + assert_eq!(d.open_time, 0); + assert_eq!(d.close_time, 0); + assert_eq!(d.status, CALENDAR_STATUS_FULL_CLOSE); +} + +#[test] +fn parse_calendar_v3_open_day() { + // Simulate calendar_on_date response for a regular trading day. + // Note: on_date and open_today omit the "date" column. + let table = proto::DataTable { + headers: vec!["type".into(), "open".into(), "close".into()], + data_table: vec![row_of(vec![ + dv_text("open"), + dv_text("09:30:00"), + dv_text("16:00:00"), + ])], + }; + + let days = parse_calendar_days_v3(&table).unwrap(); + assert_eq!(days.len(), 1); + let d = &days[0]; + assert_eq!(d.date, 0); // no date column + assert_eq!(d.is_open, 1); + assert_eq!(d.open_time, 34_200_000); // 9:30 AM = 9*3600+30*60 = 34200 seconds = 34200000 ms + assert_eq!(d.close_time, 57_600_000); // 4:00 PM = 16*3600 = 57600 seconds = 57600000 ms + assert_eq!(d.status, CALENDAR_STATUS_OPEN); +} + +#[test] +fn parse_calendar_v3_early_close() { + // Simulate an early close day (day after Thanksgiving). + let table = proto::DataTable { + headers: vec!["date".into(), "type".into(), "open".into(), "close".into()], + data_table: vec![row_of(vec![ + dv_text("2025-11-28"), + dv_text("early_close"), + dv_text("09:30:00"), + dv_text("13:00:00"), + ])], + }; + + let days = parse_calendar_days_v3(&table).unwrap(); + assert_eq!(days.len(), 1); + let d = &days[0]; + assert_eq!(d.date, 20251128); + assert_eq!(d.is_open, 1); + assert_eq!(d.open_time, 34_200_000); + assert_eq!(d.close_time, 46_800_000); // 1:00 PM = 13*3600 = 46800 seconds = 46800000 ms + assert_eq!(d.status, CALENDAR_STATUS_EARLY_CLOSE); +} + +#[test] +fn parse_calendar_v3_weekend() { + let table = proto::DataTable { + headers: vec!["type".into(), "open".into(), "close".into()], + data_table: vec![row_of(vec![dv_text("weekend"), dv_null(), dv_null()])], + }; + + let days = parse_calendar_days_v3(&table).unwrap(); + assert_eq!(days.len(), 1); + let d = &days[0]; + assert_eq!(d.is_open, 0); + assert_eq!(d.status, CALENDAR_STATUS_WEEKEND); +} + +#[test] +fn parse_time_text_valid() { + assert_eq!(parse_time_text("09:30:00"), 34_200_000); + assert_eq!(parse_time_text("16:00:00"), 57_600_000); + assert_eq!(parse_time_text("13:00:00"), 46_800_000); + assert_eq!(parse_time_text("00:00:00"), 0); +} + +#[test] +fn parse_time_text_invalid_returns_zero() { + assert_eq!(parse_time_text("invalid"), 0); + assert_eq!(parse_time_text(""), 0); +} + +#[test] +fn parse_iso_date_yyyymmdd_passthrough_and_iso_split() { + assert_eq!(parse_iso_date("20260413"), 20260413); + assert_eq!(parse_iso_date("2026-04-13"), 20260413); + assert_eq!(parse_iso_date("not-a-date"), 0); +} + +#[test] +fn parse_trade_ticks_propagates_type_mismatch() { + // A Text cell in an i32 column is a schema violation — the parser + // must surface it, not silently coerce to 0. + let table = proto::DataTable { + headers: vec!["ms_of_day".into(), "price".into()], + data_table: vec![row_of(vec![dv_text("not-a-number"), dv_price(15000, 10)])], + }; + let err = parse_trade_ticks(&table).unwrap_err(); + assert!( + matches!(err, DecodeError::TypeMismatch { .. }), + "expected TypeMismatch, got {err:?}" + ); +} + +// ─────────── Unset-oneof is an error at every strict decode site ─────────── +// +// A `DataValue` with its `data_type` oneof unset is a wire-protocol +// anomaly (Java's `PojoMessageUtils.convert` default arm throws +// `IllegalArgumentException`). The helpers `row_number` / `row_date` / +// etc. already surface it as `TypeMismatch { observed: "Unset" }`. These +// tests pin the same behaviour on the call-sites that used to coalesce +// `NullValue | None` to zero: `parse_option_contracts_v3`, +// `parse_calendar_days_v3`, the generator-emitted EOD helpers, and the +// generator-emitted contract-id injected `expiration` / `right` fields. + +#[test] +fn parse_option_contracts_v3_errors_on_unset_expiration() { + let table = proto::DataTable { + headers: vec!["root".into(), "expiration".into()], + data_table: vec![row_of(vec![dv_text("AAPL"), dv_missing()])], + }; + assert_eq!( + parse_option_contracts_v3(&table).unwrap_err(), + DecodeError::TypeMismatch { + column: 1, + expected: "Number|Text", + observed: "Unset", + } + ); +} + +#[test] +fn parse_option_contracts_v3_errors_on_unset_right() { + let table = proto::DataTable { + headers: vec!["root".into(), "right".into()], + data_table: vec![row_of(vec![dv_text("AAPL"), dv_missing()])], + }; + assert_eq!( + parse_option_contracts_v3(&table).unwrap_err(), + DecodeError::TypeMismatch { + column: 1, + expected: "Number|Text", + observed: "Unset", + } + ); +} + +#[test] +fn parse_calendar_days_v3_errors_on_unset_date() { + let table = proto::DataTable { + headers: vec!["date".into(), "type".into()], + data_table: vec![row_of(vec![dv_missing(), dv_text("open")])], + }; + assert_eq!( + parse_calendar_days_v3(&table).unwrap_err(), + DecodeError::TypeMismatch { + column: 0, + expected: "Number|Timestamp|Text", + observed: "Unset", + } + ); +} + +#[test] +fn parse_calendar_days_v3_errors_on_unset_open_time() { + // `decode_calendar_time` is the helper covering both `open` and + // `close`; one test pins the shared path. + let table = proto::DataTable { + headers: vec!["type".into(), "open".into(), "close".into()], + data_table: vec![row_of(vec![ + dv_text("open"), + dv_missing(), + dv_text("16:00:00"), + ])], + }; + assert_eq!( + parse_calendar_days_v3(&table).unwrap_err(), + DecodeError::TypeMismatch { + column: 1, + expected: "Text|Number", + observed: "Unset", + } + ); +} + +#[test] +fn parse_eod_ticks_errors_on_unset_cell() { + // `parse_eod_ticks` is generator-emitted with the `eod_num` / + // `eod_date` / `eod_price` helpers; one test pins the shared path. + let table = proto::DataTable { + headers: vec!["timestamp".into(), "open".into()], + data_table: vec![row_of(vec![dv_missing(), dv_number(15000)])], + }; + let err = parse_eod_ticks(&table).unwrap_err(); + assert_eq!( + err, + DecodeError::TypeMismatch { + column: 0, + expected: "Number|Price|Timestamp", + observed: "Unset", + } + ); +} + +#[test] +fn parse_trade_ticks_errors_on_unset_injected_expiration() { + // `parse_trade_ticks` is generator-emitted with `contract_id = true`; + // an `expiration` header in the server payload triggers the injected + // `expiration` / `strike` / `right` decode. An unset cell there used + // to coalesce to 0; now it must fail loud. + let table = proto::DataTable { + headers: vec!["ms_of_day".into(), "price".into(), "expiration".into()], + data_table: vec![row_of(vec![ + dv_number(34_200_000), + dv_price(15000, 10), + dv_missing(), + ])], + }; + let err = parse_trade_ticks(&table).unwrap_err(); + assert_eq!( + err, + DecodeError::TypeMismatch { + column: 2, + expected: "Number|Text", + observed: "Unset", + } + ); +} + +#[test] +fn parse_trade_ticks_errors_on_unset_injected_right() { + let table = proto::DataTable { + headers: vec!["ms_of_day".into(), "price".into(), "right".into()], + data_table: vec![row_of(vec![ + dv_number(34_200_000), + dv_price(15000, 10), + dv_missing(), + ])], + }; + let err = parse_trade_ticks(&table).unwrap_err(); + assert_eq!( + err, + DecodeError::TypeMismatch { + column: 2, + expected: "Number|Text", + observed: "Unset", + } + ); +} + +#[test] +fn parse_greeks_all_ticks_decodes_price_encoded_greeks() { + // Regression: an earlier strict decode rejected Price cells for Greek + // columns, but the v3 MDDS server sends Greeks as Price-encoded + // values (mirroring Java's `dataValue2Object` -> BigDecimal path). + // Live run #24520486541 on main surfaced this as + // "column 13: expected Number, got Price" + // on `option_snapshot_greeks_first_order::bulk_chain` and peers. + // Pin Price-cell decoding for both IV and a Greek so a future + // strict-Number tightening can't re-break it silently. + let table = proto::DataTable { + headers: vec![ + "ms_of_day".into(), + "implied_volatility".into(), + "delta".into(), + ], + data_table: vec![row_of(vec![ + dv_number(34_200_000), + // IV = 0.1234 encoded with price_type = 6 (value * 10^-4). + dv_price(1234, 6), + // Delta = 0.5 encoded with price_type = 9 (value * 10^-1). + dv_price(5, 9), + ])], + }; + let ticks = parse_greeks_all_ticks(&table).unwrap(); + assert_eq!(ticks.len(), 1); + assert!((ticks[0].implied_volatility - 0.1234).abs() < 1e-10); + assert!((ticks[0].delta - 0.5).abs() < 1e-10); +} + +/// Pin the `implied_vol → implied_volatility` and `underlying_timestamp +/// → underlying_ms_of_day` aliases in `HEADER_ALIASES` by decoding a wire +/// payload whose headers use ONLY the v3 server-side names. If either +/// alias entry is dropped or mistyped, the matching schema field +/// silently zero-defaults via `opt_float` / `opt_number` (see the +/// generated `parse_greeks_all_ticks` body), and this test catches that +/// regression. +/// +/// The companion fixture-driven test +/// `crates/thetadatadx/tests/test_decode_captures.rs::greeks_all_*` +/// can't catch a broken `implied_vol` alias on its own because the +/// captured fixture's `first_row_implied_volatility` is `0.0` — a +/// missing alias and a real zero IV are indistinguishable there. +#[test] +fn parse_greeks_all_ticks_resolves_implied_vol_and_underlying_timestamp_aliases() { + // Headers use the v3 server-side names. Schema names + // (`implied_volatility`, `underlying_ms_of_day`) are deliberately + // absent so the parser MUST resolve them via `HEADER_ALIASES`. + let table = proto::DataTable { + headers: vec![ + "ms_of_day".into(), + "implied_vol".into(), + "underlying_timestamp".into(), + ], + // IV = 0.42 encoded with price_type = 6 (value * 10^-4). + // underlying_timestamp epoch_ms 1_775_050_200_000 corresponds + // to 2026-04-01 09:30 ET, which `row_number` converts to + // ms-of-day 34_200_000 (matching `first_row_underlying_ms_of_day` + // in the option_history_greeks_all fixture meta). + data_table: vec![row_of(vec![ + dv_number(34_200_000), + dv_price(4200, 6), + dv_timestamp(1_775_050_200_000), + ])], + }; + let ticks = parse_greeks_all_ticks(&table).unwrap(); + assert_eq!(ticks.len(), 1); + let t = &ticks[0]; + + // Non-zero IV proves the `implied_vol` alias resolved; a broken + // alias would produce 0.0 from the `opt_float(None)` arm. + assert!( + (t.implied_volatility - 0.42).abs() < 1e-9, + "implied_vol alias did not resolve: got {}", + t.implied_volatility, + ); + // Non-zero ms-of-day proves the `underlying_timestamp` alias + // resolved; a broken alias would produce 0 from `opt_number(None)`. + assert_eq!(t.underlying_ms_of_day, 34_200_000); +} + +#[test] +fn parse_greeks_all_ticks_still_decodes_number_cells() { + // Companion to the Price-cell regression test: Number cells must + // still decode, matching Java's dispatch-on-wire-type semantics. + let table = proto::DataTable { + headers: vec!["ms_of_day".into(), "implied_volatility".into()], + data_table: vec![row_of(vec![dv_number(34_200_000), dv_number(0)])], + }; + let ticks = parse_greeks_all_ticks(&table).unwrap(); + assert_eq!(ticks.len(), 1); + assert!(ticks[0].implied_volatility.abs() < 1e-10); +} + +/// Vendor wire shape for `option_*_greeks_first_order`: only the seven +/// first-order columns plus IV pair — vanna/charm/vomma/veta/speed/ +/// zomma/color/ultima/d1/d2/dual_delta/dual_gamma/vera are absent and +/// must default to `0.0` without surfacing any `find_header` warn. +/// Column layout pinned to `scripts/upstream_openapi.yaml` schema +/// `items_option_snapshot_greeks_first_order`. +#[test] +fn parse_greeks_all_ticks_decodes_first_order_subset_with_silent_gaps() { + let table = proto::DataTable { + headers: vec![ + "ms_of_day".into(), + "implied_volatility".into(), + "delta".into(), + "theta".into(), + "vega".into(), + "rho".into(), + "epsilon".into(), + "lambda".into(), + "iv_error".into(), + "date".into(), + ], + data_table: vec![row_of(vec![ + dv_number(34_200_000), + dv_price(2142, 6), // implied_volatility = 0.2142 + dv_price(5023, 6), // delta = 0.5023 + dv_price(-114, 6), // theta = -0.0114 + dv_price(8741, 6), // vega = 0.8741 + dv_price(13598, 6), // rho = 1.3598 + dv_price(-1976, 6), // epsilon = -0.1976 + dv_price(32052, 6), // lambda = 3.2052 + dv_price(-3, 6), // iv_error = -3 / 10^4 = -0.0003 + dv_number(20_240_614), + ])], + }; + let ticks = parse_greeks_all_ticks(&table).unwrap(); + assert_eq!(ticks.len(), 1); + let t = &ticks[0]; + + // Wire-present columns: bit-exact against the input. + // `dv_price(value, 6)` decodes as `value * 10^(6-10) = value / 10000` + // (see `tdbe::types::price::Price::to_f64`). + assert_eq!(t.ms_of_day, 34_200_000); + assert!((t.implied_volatility - 0.2142).abs() < 1e-9); + assert!((t.delta - 0.5023).abs() < 1e-9); + assert!((t.theta - -0.0114).abs() < 1e-9); + assert!((t.vega - 0.8741).abs() < 1e-9); + assert!((t.rho - 1.3598).abs() < 1e-9); + assert!((t.epsilon - -0.1976).abs() < 1e-9); + assert!((t.lambda - 3.2052).abs() < 1e-9); + assert!((t.iv_error - -0.0003).abs() < 1e-9); + assert_eq!(t.date, 20_240_614); + + // Wire-absent columns: zero-defaulted. These are the columns the + // server does NOT publish for `_greeks_first_order` — `find_header` + // returning `None` for each must NOT yield an error and must NOT + // warn (the pre-fix behaviour spammed eight warn lines per row). + assert_eq!(t.gamma, 0.0); + assert_eq!(t.vanna, 0.0); + assert_eq!(t.charm, 0.0); + assert_eq!(t.vomma, 0.0); + assert_eq!(t.veta, 0.0); + assert_eq!(t.speed, 0.0); + assert_eq!(t.zomma, 0.0); + assert_eq!(t.color, 0.0); + assert_eq!(t.ultima, 0.0); + assert_eq!(t.d1, 0.0); + assert_eq!(t.d2, 0.0); + assert_eq!(t.dual_delta, 0.0); + assert_eq!(t.dual_gamma, 0.0); + assert_eq!(t.vera, 0.0); +} + +/// Vendor wire shape for `option_*_greeks_second_order`: gamma / vanna +/// / charm / vomma / veta plus IV pair. Column layout pinned to +/// upstream OpenAPI schema `items_option_snapshot_greeks_second_order`. +#[test] +fn parse_greeks_all_ticks_decodes_second_order_subset_with_silent_gaps() { + let table = proto::DataTable { + headers: vec![ + "ms_of_day".into(), + "implied_volatility".into(), + "gamma".into(), + "vanna".into(), + "charm".into(), + "vomma".into(), + "veta".into(), + "iv_error".into(), + "date".into(), + ], + data_table: vec![row_of(vec![ + dv_number(34_200_000), + dv_price(2142, 6), // implied_volatility = 0.2142 + dv_price(120, 6), // gamma = 0.012 + dv_price(45, 6), // vanna = 0.0045 + dv_price(-12, 6), // charm = -0.0012 + dv_price(900, 6), // vomma = 0.09 + dv_price(-3, 6), // veta = -0.0003 + dv_price(-3, 6), // iv_error = -0.0003 + dv_number(20_240_614), + ])], + }; + let ticks = parse_greeks_all_ticks(&table).unwrap(); + assert_eq!(ticks.len(), 1); + let t = &ticks[0]; + + assert!((t.gamma - 0.012).abs() < 1e-9); + assert!((t.vanna - 0.0045).abs() < 1e-9); + assert!((t.charm - -0.0012).abs() < 1e-9); + assert!((t.vomma - 0.09).abs() < 1e-9); + assert!((t.veta - -0.0003).abs() < 1e-9); + + // First-order, third-order, and `_all`-only columns are absent + // on the wire and default to 0.0. + assert_eq!(t.delta, 0.0); + assert_eq!(t.speed, 0.0); + assert_eq!(t.zomma, 0.0); + assert_eq!(t.d1, 0.0); + assert_eq!(t.vera, 0.0); +} + +/// Vendor wire shape for `option_*_greeks_third_order`: speed / zomma / +/// color / ultima plus IV pair. This is the exact endpoint the Issue +/// #472 reporter was hitting — `option_snapshot_greeks_third_order` +/// previously emitted eight warn lines per row for the absent +/// first-order / second-order / `_all`-only columns. The test pins the +/// silent-gap behaviour so a future regression of `find_header` back +/// to `tracing::warn!` would surface here as a behavioural change. +/// Column layout pinned to upstream OpenAPI schema +/// `items_option_snapshot_greeks_third_order` (notably `vera` is NOT +/// in the third-order subset; it only ships in `_greeks_all`). +#[test] +fn parse_greeks_all_ticks_decodes_third_order_subset_with_silent_gaps() { + let table = proto::DataTable { + headers: vec![ + "ms_of_day".into(), + "implied_volatility".into(), + "speed".into(), + "zomma".into(), + "color".into(), + "ultima".into(), + "iv_error".into(), + "date".into(), + ], + data_table: vec![row_of(vec![ + dv_number(34_200_000), + dv_price(2142, 6), // implied_volatility = 0.2142 + dv_price(7, 6), // speed = 0.0007 + dv_price(15, 6), // zomma = 0.0015 + dv_price(-2, 6), // color = -0.0002 + dv_price(33, 6), // ultima = 0.0033 + dv_price(-3, 6), // iv_error = -0.0003 + dv_number(20_240_614), + ])], + }; + let ticks = parse_greeks_all_ticks(&table).unwrap(); + assert_eq!(ticks.len(), 1); + let t = &ticks[0]; + + assert!((t.speed - 0.0007).abs() < 1e-9); + assert!((t.zomma - 0.0015).abs() < 1e-9); + assert!((t.color - -0.0002).abs() < 1e-9); + assert!((t.ultima - 0.0033).abs() < 1e-9); + + // Vera is NOT a third-order column on the wire even though the + // generic `GreeksTick` struct carries the field. It must default + // to 0.0 here without warning. + assert_eq!(t.vera, 0.0); + // First-order and second-order columns also absent. + assert_eq!(t.delta, 0.0); + assert_eq!(t.gamma, 0.0); + assert_eq!(t.vanna, 0.0); + assert_eq!(t.d1, 0.0); + assert_eq!(t.dual_gamma, 0.0); +} + +/// `parse_greeks_first_order_ticks` against the column subset the +/// vendor publishes for `option_*_greeks_first_order` -- pinned to +/// `items_option_snapshot_greeks_first_order` in the upstream OpenAPI. +/// Asserts every column the parser fills decodes to the exact value +/// from the input row, and that the underlying-snapshot pair is +/// populated (the column subset is what differs from `_greeks_all`, +/// not the underlying tail). +#[test] +fn parse_greeks_first_order_ticks_decodes_first_order_subset() { + let table = proto::DataTable { + headers: vec![ + "ms_of_day".into(), + "bid".into(), + "ask".into(), + "delta".into(), + "theta".into(), + "vega".into(), + "rho".into(), + "epsilon".into(), + "lambda".into(), + "implied_volatility".into(), + "iv_error".into(), + "underlying_ms_of_day".into(), + "underlying_price".into(), + "date".into(), + ], + data_table: vec![row_of(vec![ + dv_number(34_200_000), + dv_price(15022, 6), // bid = 1.5022 + dv_price(15041, 6), // ask = 1.5041 + dv_price(5023, 6), // delta = 0.5023 + dv_price(-114, 6), // theta = -0.0114 + dv_price(8741, 6), // vega = 0.8741 + dv_price(13598, 6), // rho = 1.3598 + dv_price(-1976, 6), // epsilon = -0.1976 + dv_price(32052, 6), // lambda = 3.2052 + dv_price(2142, 6), // implied_volatility = 0.2142 + dv_price(-3, 6), // iv_error = -0.0003 + dv_number(34_200_001), + dv_price(580025, 6), // underlying_price = 58.0025 + dv_number(20_240_614), + ])], + }; + let ticks = parse_greeks_first_order_ticks(&table).unwrap(); + assert_eq!(ticks.len(), 1); + let t = &ticks[0]; + + assert_eq!(t.ms_of_day, 34_200_000); + assert!((t.bid - 1.5022).abs() < 1e-9); + assert!((t.ask - 1.5041).abs() < 1e-9); + assert!((t.delta - 0.5023).abs() < 1e-9); + assert!((t.theta - -0.0114).abs() < 1e-9); + assert!((t.vega - 0.8741).abs() < 1e-9); + assert!((t.rho - 1.3598).abs() < 1e-9); + assert!((t.epsilon - -0.1976).abs() < 1e-9); + assert!((t.lambda - 3.2052).abs() < 1e-9); + assert!((t.implied_volatility - 0.2142).abs() < 1e-9); + assert!((t.iv_error - -0.0003).abs() < 1e-9); + assert_eq!(t.underlying_ms_of_day, 34_200_001); + assert!((t.underlying_price - 58.0025).abs() < 1e-9); + assert_eq!(t.date, 20_240_614); +} + +/// `parse_greeks_second_order_ticks` against the column subset the +/// vendor publishes for `option_*_greeks_second_order` -- pinned to +/// `items_option_snapshot_greeks_second_order` in the upstream +/// OpenAPI. Second-order Greeks: gamma / vanna / charm / vomma / +/// veta plus the IV pair and the bid/ask quote pair. +#[test] +fn parse_greeks_second_order_ticks_decodes_second_order_subset() { + let table = proto::DataTable { + headers: vec![ + "ms_of_day".into(), + "bid".into(), + "ask".into(), + "gamma".into(), + "vanna".into(), + "charm".into(), + "vomma".into(), + "veta".into(), + "implied_volatility".into(), + "iv_error".into(), + "underlying_ms_of_day".into(), + "underlying_price".into(), + "date".into(), + ], + data_table: vec![row_of(vec![ + dv_number(34_200_000), + dv_price(15022, 6), // bid = 1.5022 + dv_price(15041, 6), // ask = 1.5041 + dv_price(120, 6), // gamma = 0.012 + dv_price(45, 6), // vanna = 0.0045 + dv_price(-12, 6), // charm = -0.0012 + dv_price(900, 6), // vomma = 0.09 + dv_price(-3, 6), // veta = -0.0003 + dv_price(2142, 6), // implied_volatility = 0.2142 + dv_price(-3, 6), // iv_error = -0.0003 + dv_number(34_200_001), + dv_price(580025, 6), + dv_number(20_240_614), + ])], + }; + let ticks = parse_greeks_second_order_ticks(&table).unwrap(); + assert_eq!(ticks.len(), 1); + let t = &ticks[0]; + + assert_eq!(t.ms_of_day, 34_200_000); + assert!((t.bid - 1.5022).abs() < 1e-9); + assert!((t.ask - 1.5041).abs() < 1e-9); + assert!((t.gamma - 0.012).abs() < 1e-9); + assert!((t.vanna - 0.0045).abs() < 1e-9); + assert!((t.charm - -0.0012).abs() < 1e-9); + assert!((t.vomma - 0.09).abs() < 1e-9); + assert!((t.veta - -0.0003).abs() < 1e-9); + assert!((t.implied_volatility - 0.2142).abs() < 1e-9); + assert!((t.iv_error - -0.0003).abs() < 1e-9); + assert_eq!(t.underlying_ms_of_day, 34_200_001); + assert!((t.underlying_price - 58.0025).abs() < 1e-9); + assert_eq!(t.date, 20_240_614); +} + +/// `parse_greeks_third_order_ticks` against the column subset the +/// vendor publishes for `option_*_greeks_third_order` -- pinned to +/// `items_option_snapshot_greeks_third_order` in the upstream +/// OpenAPI. Third-order Greeks: speed / zomma / color / ultima plus +/// the IV pair and the bid/ask quote pair. Notably the wire schema +/// does NOT publish `vera`; the struct does not carry it either. +#[test] +fn parse_greeks_third_order_ticks_decodes_third_order_subset() { + let table = proto::DataTable { + headers: vec![ + "ms_of_day".into(), + "bid".into(), + "ask".into(), + "speed".into(), + "zomma".into(), + "color".into(), + "ultima".into(), + "implied_volatility".into(), + "iv_error".into(), + "underlying_ms_of_day".into(), + "underlying_price".into(), + "date".into(), + ], + data_table: vec![row_of(vec![ + dv_number(34_200_000), + dv_price(15022, 6), // bid = 1.5022 + dv_price(15041, 6), // ask = 1.5041 + dv_price(7, 6), // speed = 0.0007 + dv_price(15, 6), // zomma = 0.0015 + dv_price(-2, 6), // color = -0.0002 + dv_price(33, 6), // ultima = 0.0033 + dv_price(2142, 6), // implied_volatility = 0.2142 + dv_price(-3, 6), // iv_error = -0.0003 + dv_number(34_200_001), + dv_price(580025, 6), + dv_number(20_240_614), + ])], + }; + let ticks = parse_greeks_third_order_ticks(&table).unwrap(); + assert_eq!(ticks.len(), 1); + let t = &ticks[0]; + + assert_eq!(t.ms_of_day, 34_200_000); + assert!((t.bid - 1.5022).abs() < 1e-9); + assert!((t.ask - 1.5041).abs() < 1e-9); + assert!((t.speed - 0.0007).abs() < 1e-9); + assert!((t.zomma - 0.0015).abs() < 1e-9); + assert!((t.color - -0.0002).abs() < 1e-9); + assert!((t.ultima - 0.0033).abs() < 1e-9); + assert!((t.implied_volatility - 0.2142).abs() < 1e-9); + assert!((t.iv_error - -0.0003).abs() < 1e-9); + assert_eq!(t.underlying_ms_of_day, 34_200_001); + assert!((t.underlying_price - 58.0025).abs() < 1e-9); + assert_eq!(t.date, 20_240_614); +} diff --git a/crates/thetadatadx/src/mdds/decode/transport.rs b/crates/thetadatadx/src/mdds/decode/transport.rs new file mode 100644 index 00000000..5f5b9ffe --- /dev/null +++ b/crates/thetadatadx/src/mdds/decode/transport.rs @@ -0,0 +1,91 @@ +//! `ResponseData` decompression and `DataTable` decode. +//! +//! Recycles a thread-local zstd decompressor and output buffer so repeated +//! decompressions of similar-sized payloads avoid allocator pressure on the +//! working buffer. + +use std::cell::RefCell; + +use crate::error::Error; +use crate::proto; + +thread_local! { + /// Reusable zstd decompressor **and** output buffer — avoids allocating both + /// a fresh decompressor context and a fresh `Vec` on every call. + /// + /// The decompressor context (~128 KB of zstd internal state) is recycled, and + /// the output buffer retains its capacity across calls so that repeated + /// decompressions of similar-sized payloads hit no allocator at all. + /// + /// We use `decompress_to_buffer` which writes into the pre-existing Vec + /// without reallocating when capacity is sufficient. The final `.clone()` + /// is necessary since we return ownership, but the internal buffer capacity + /// persists across calls — the key win is avoiding repeated alloc/dealloc + /// cycles for the working buffer. + static ZSTD_STATE: RefCell<(zstd::bulk::Decompressor<'static>, Vec)> = RefCell::new(( + // Infallible in practice: zstd decompressor creation only fails on OOM. + // thread_local! does not support Result, so unwrap is intentional here. + zstd::bulk::Decompressor::new().expect("zstd decompressor creation failed (possible OOM)"), + Vec::with_capacity(1024 * 1024), // 1 MB initial capacity + )); +} + +/// Decompress a `ResponseData` payload. Returns the raw protobuf bytes of the `DataTable`. +/// +/// # Unknown compression algorithms +/// +/// Prost's `.algo()` silently maps unknown enum values to the default (None=0), +/// so we check the raw i32 to detect truly unknown algorithms. Without this, +/// an unrecognized algorithm would be treated as uncompressed, producing garbage. +/// +/// # Buffer recycling +/// +/// Uses a thread-local `(Decompressor, Vec)` pair. The `Vec` retains its +/// capacity across calls, so repeated decompressions of similar-sized payloads +/// avoid hitting the allocator for the working buffer. The returned `Vec` +/// is a clone (we must return ownership), but the internal slab persists. +/// # Errors +/// +/// Returns [`Error::Decompress`] if the compression algorithm is unknown or +/// zstd decompression fails. +// Reason: original_size is a protobuf u64 that fits in usize for valid payloads. +#[allow(clippy::cast_possible_truncation)] +pub fn decompress_response(response: &proto::ResponseData) -> Result, Error> { + let algo_raw = response + .compression_description + .as_ref() + .map_or(0, |cd| cd.algo); + + match proto::CompressionAlgo::try_from(algo_raw) { + Ok(proto::CompressionAlgo::None) => Ok(response.compressed_data.clone()), + Ok(proto::CompressionAlgo::Zstd) => { + let original_size = usize::try_from(response.original_size).unwrap_or(0); + ZSTD_STATE.with(|cell| { + let (ref mut dec, ref mut buf) = *cell.borrow_mut(); + buf.clear(); + buf.resize(original_size, 0); + let n = dec + .decompress_to_buffer(&response.compressed_data, buf) + .map_err(|e| Error::Decompress(e.to_string()))?; + buf.truncate(n); + Ok(buf.clone()) + }) + } + _ => Err(Error::Decompress(format!( + "unknown compression algorithm: {algo_raw}" + ))), + } +} + +/// Decode a `ResponseData` into a `DataTable`. +/// +/// # Errors +/// +/// Returns [`Error::Decompress`] if decompression fails or [`Error::Decode`] +/// if protobuf deserialization fails. +pub fn decode_data_table(response: &proto::ResponseData) -> Result { + let bytes = decompress_response(response)?; + let table: proto::DataTable = + prost::Message::decode(bytes.as_slice()).map_err(|e| Error::Decode(e.to_string()))?; + Ok(table) +} diff --git a/crates/thetadatadx/src/mdds/decode/v3.rs b/crates/thetadatadx/src/mdds/decode/v3.rs new file mode 100644 index 00000000..7399905e --- /dev/null +++ b/crates/thetadatadx/src/mdds/decode/v3.rs @@ -0,0 +1,339 @@ +//! Hand-written parsers for v3 MDDS payload shapes that the macro-generated +//! parser cannot model directly. +//! +//! v3 publishes some columns as text (ISO dates, "PUT"/"CALL" rights, the +//! calendar `type` column) where the schema would otherwise expect numeric +//! cells. The hand-written parsers here dispatch on the cell's own wire +//! type, surfacing mismatches as [`DecodeError::TypeMismatch`] rather than +//! coalescing silently. + +use crate::proto; +use tdbe::types::tick::{CalendarDay, OptionContract}; + +use super::cell::{cell_type, row_price_f64, row_text}; +use super::error::{observed_name, DecodeError}; +use super::headers::find_header; + +/// Hand-written parser for `OptionContract` that handles the v3 server's +/// text-formatted fields (expiration as ISO date, right as "PUT"/"CALL"). +/// +/// The `expiration` and `right` columns legitimately arrive as either `Number` +/// or `Text` depending on the upstream version, so the parser dispatches on +/// the cell's own type rather than coalescing silently. Mismatched types +/// propagate as [`DecodeError::TypeMismatch`]. +/// +/// # Errors +/// +/// Returns [`DecodeError`] on type mismatch or missing cell. +pub fn parse_option_contracts_v3( + table: &crate::proto::DataTable, +) -> Result, DecodeError> { + let h: Vec<&str> = table + .headers + .iter() + .map(std::string::String::as_str) + .collect(); + + // Same schema-drift guard as the generated parsers: "no contracts today" + // is legitimate, but a rows-present response missing the required `root` + // column is a silent data-loss trap. The wire column is still named + // `root` (or `symbol` via the v3 alias in `decode::HEADER_ALIASES`); the + // `symbol` binding here is the public-API field name documented in the + // v3 vendor migration guide. + let symbol_idx = match find_header(&h, "root") { + Some(i) => i, + None => { + if table.data_table.is_empty() { + return Ok(vec![]); + } + return Err(DecodeError::MissingRequiredHeader { + header: "root", + rows: table.data_table.len(), + available: h.join(","), + }); + } + }; + let exp_idx = find_header(&h, "expiration"); + let strike_idx = find_header(&h, "strike"); + let right_idx = find_header(&h, "right"); + + table + .data_table + .iter() + .map(|row| { + let symbol = row_text(row, symbol_idx)?.unwrap_or_default(); + + // Expiration: `Number` carries YYYYMMDD directly; `Text` carries + // an ISO "2026-04-13" that we parse here. `NullValue` → 0 (legit + // null, coalesce). An unset oneof is a wire anomaly → TypeMismatch. + let expiration = match exp_idx { + Some(i) => match cell_type(row, i)? { + Some(proto::data_value::DataType::Number(n)) => *n as i32, + Some(proto::data_value::DataType::Text(s)) => parse_iso_date(s), + Some(proto::data_value::DataType::NullValue(_)) => 0, + None => { + return Err(DecodeError::TypeMismatch { + column: i, + expected: "Number|Text", + observed: "Unset", + }); + } + other => { + return Err(DecodeError::TypeMismatch { + column: i, + expected: "Number|Text", + observed: observed_name(other), + }); + } + }, + None => 0, + }; + + let strike = match strike_idx { + Some(i) => row_price_f64(row, i)?.unwrap_or(0.0), + None => 0.0, + }; + + // Right: `Number` carries the ASCII code directly; `Text` carries + // "PUT"/"CALL"/"P"/"C". `NullValue` / unknown text → 0. An unset + // oneof is a wire anomaly → TypeMismatch. + let right = match right_idx { + Some(i) => match cell_type(row, i)? { + Some(proto::data_value::DataType::Number(n)) => *n as i32, + Some(proto::data_value::DataType::Text(s)) => match s.as_str() { + "CALL" | "C" => 67, // ASCII 'C' + "PUT" | "P" => 80, // ASCII 'P' + _ => 0, + }, + Some(proto::data_value::DataType::NullValue(_)) => 0, + None => { + return Err(DecodeError::TypeMismatch { + column: i, + expected: "Number|Text", + observed: "Unset", + }); + } + other => { + return Err(DecodeError::TypeMismatch { + column: i, + expected: "Number|Text", + observed: observed_name(other), + }); + } + }, + None => 0, + }; + + Ok(OptionContract { + symbol, + expiration, + strike, + right, + }) + }) + .collect() +} + +/// Parse an ISO date string "2026-04-13" to YYYYMMDD integer 20260413. +// Reason: date parsing with known-safe integer ranges. +#[allow(clippy::cast_possible_truncation, clippy::missing_panics_doc)] +pub(crate) fn parse_iso_date(s: &str) -> i32 { + // Fast path: already numeric (YYYYMMDD) + if let Ok(n) = s.parse::() { + return n; + } + // ISO format: YYYY-MM-DD + let parts: Vec<&str> = s.split('-').collect(); + if parts.len() == 3 { + if let (Ok(y), Ok(m), Ok(d)) = ( + parts[0].parse::(), + parts[1].parse::(), + parts[2].parse::(), + ) { + return y * 10_000 + m * 100 + d; + } + } + 0 +} + +/// Parse a time string "HH:MM:SS" to milliseconds from midnight. +pub(crate) fn parse_time_text(s: &str) -> i32 { + let parts: Vec<&str> = s.split(':').collect(); + if parts.len() == 3 { + if let (Ok(h), Ok(m), Ok(sec)) = ( + parts[0].parse::(), + parts[1].parse::(), + parts[2].parse::(), + ) { + return (h * 3_600 + m * 60 + sec) * 1_000; + } + } + 0 +} + +/// Calendar day status constants. +/// +/// The v3 MDDS server sends a `type` column with text values. We map them to +/// integer constants for the `CalendarDay.status` field: +/// +/// | Server text | Constant | Meaning | +/// |----------------|----------|-----------------------------------| +/// | `"open"` | `0` | Normal trading day | +/// | `"early_close"`| `1` | Early close (e.g. day after Thanksgiving) | +/// | `"full_close"` | `2` | Market closed (holiday) | +/// | `"weekend"` | `3` | Weekend | +/// | (unknown) | `-1` | Unrecognized status text | +pub const CALENDAR_STATUS_OPEN: i32 = 0; +pub const CALENDAR_STATUS_EARLY_CLOSE: i32 = 1; +pub const CALENDAR_STATUS_FULL_CLOSE: i32 = 2; +pub const CALENDAR_STATUS_WEEKEND: i32 = 3; +pub const CALENDAR_STATUS_UNKNOWN: i32 = -1; + +/// Map a v3 calendar `type` text to `(is_open, status)`. +fn calendar_type_text(s: &str) -> (i32, i32) { + match s { + "open" => (1, CALENDAR_STATUS_OPEN), + "early_close" => (1, CALENDAR_STATUS_EARLY_CLOSE), + "full_close" => (0, CALENDAR_STATUS_FULL_CLOSE), + "weekend" => (0, CALENDAR_STATUS_WEEKEND), + _ => (0, CALENDAR_STATUS_UNKNOWN), + } +} + +/// Hand-written parser for `CalendarDay` that handles the v3 server's +/// text-formatted fields. +/// +/// The v3 MDDS server sends calendar data with different column names and types +/// than the generated parser expects: +/// +/// | Schema field | Server header | Server type | Mapping | +/// |--------------|---------------|-------------|---------------------------------------| +/// | `date` | `date` | Text | "2025-01-01" -> 20250101 | +/// | `is_open` | `type` | Text | "`open"/"early_close`" -> 1, else -> 0 | +/// | `open_time` | `open` | Text / Null | "09:30:00" -> 34200000 ms | +/// | `close_time` | `close` | Text / Null | "16:00:00" -> 57600000 ms | +/// | `status` | `type` | Text | See [`CALENDAR_STATUS_OPEN`] etc. | +/// +/// Note: `calendar_on_date` and `calendar_open_today` omit the `date` column. +/// Each column dispatches on the cell's own type rather than coalescing +/// silently — mismatched types propagate as [`DecodeError::TypeMismatch`]. +/// +/// # Errors +/// +/// Returns [`DecodeError`] on type mismatch or missing cell. +pub fn parse_calendar_days_v3( + table: &crate::proto::DataTable, +) -> Result, DecodeError> { + let h: Vec<&str> = table + .headers + .iter() + .map(std::string::String::as_str) + .collect(); + + let date_idx = h.iter().position(|&s| s == "date"); + let type_idx = h.iter().position(|&s| s == "type"); + let open_idx = h.iter().position(|&s| s == "open"); + let close_idx = h.iter().position(|&s| s == "close"); + + table + .data_table + .iter() + .map(|row| { + // date: Number carries YYYYMMDD, Timestamp converts to ET date, + // Text "2025-01-01" parses to YYYYMMDD. `NullValue` → 0 (legit + // null). Unset oneof is a wire anomaly → TypeMismatch. + let date = match date_idx { + Some(i) => match cell_type(row, i)? { + Some(proto::data_value::DataType::Number(n)) => *n as i32, + Some(proto::data_value::DataType::Timestamp(ts)) => { + tdbe::time::timestamp_to_date(ts.epoch_ms) + } + Some(proto::data_value::DataType::Text(s)) => parse_iso_date(s), + Some(proto::data_value::DataType::NullValue(_)) => 0, + None => { + return Err(DecodeError::TypeMismatch { + column: i, + expected: "Number|Timestamp|Text", + observed: "Unset", + }); + } + other => { + return Err(DecodeError::TypeMismatch { + column: i, + expected: "Number|Timestamp|Text", + observed: observed_name(other), + }); + } + }, + None => 0, + }; + + // type: Text "open"/"full_close"/"early_close"/"weekend"; Number + // kept as a future-proofing path. `NullValue` → (0, 0). Unset + // oneof is a wire anomaly → TypeMismatch. + let (is_open, status) = match type_idx { + Some(i) => match cell_type(row, i)? { + Some(proto::data_value::DataType::Text(s)) => calendar_type_text(s), + Some(proto::data_value::DataType::Number(n)) => { + let n = *n as i32; + (i32::from(n != 0), n) + } + Some(proto::data_value::DataType::NullValue(_)) => (0, 0), + None => { + return Err(DecodeError::TypeMismatch { + column: i, + expected: "Text|Number", + observed: "Unset", + }); + } + other => { + return Err(DecodeError::TypeMismatch { + column: i, + expected: "Text|Number", + observed: observed_name(other), + }); + } + }, + None => (0, 0), + }; + + let open_time = decode_calendar_time(row, open_idx)?; + let close_time = decode_calendar_time(row, close_idx)?; + + Ok(CalendarDay { + date, + is_open, + open_time, + close_time, + status, + }) + }) + .collect() +} + +/// Decode a calendar `open`/`close` column. `Text "HH:MM:SS"` → ms-of-day; +/// `Number` kept as future-proofing. `NullValue` / absent column → 0. An unset +/// oneof is a wire anomaly → [`DecodeError::TypeMismatch`]. +fn decode_calendar_time( + row: &proto::DataValueList, + idx: Option, +) -> Result { + let Some(i) = idx else { + return Ok(0); + }; + match cell_type(row, i)? { + Some(proto::data_value::DataType::Text(s)) => Ok(parse_time_text(s)), + Some(proto::data_value::DataType::Number(n)) => Ok(*n as i32), + Some(proto::data_value::DataType::NullValue(_)) => Ok(0), + None => Err(DecodeError::TypeMismatch { + column: i, + expected: "Text|Number", + observed: "Unset", + }), + other => Err(DecodeError::TypeMismatch { + column: i, + expected: "Text|Number", + observed: observed_name(other), + }), + } +} diff --git a/crates/thetadatadx/src/mdds/mod.rs b/crates/thetadatadx/src/mdds/mod.rs index b28cbae1..fd213fee 100644 --- a/crates/thetadatadx/src/mdds/mod.rs +++ b/crates/thetadatadx/src/mdds/mod.rs @@ -37,6 +37,7 @@ //! live in the in-crate `wire_semantics` module. mod client; +pub mod decode; mod endpoints; mod stream; mod validate; diff --git a/docs-site/docs/changelog.md b/docs-site/docs/changelog.md index 29837eda..6815d4d9 100644 --- a/docs-site/docs/changelog.md +++ b/docs-site/docs/changelog.md @@ -5,6 +5,25 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [8.0.34] - 2026-05-07 + +### Changed + +- **`crates/thetadatadx/src/decode.rs` (2177 LoC) split into + `mdds/decode/{error,headers,transport,extract,cell,v3}` modules.** + Pure structural refactor; public API unchanged. Re-exports + preserved at `thetadatadx::mdds::decode::*`. + +- **Eastern-time + DST primitives lifted to `tdbe::time`.** + `eastern_offset_ms`, `march_second_sunday_utc`, + `november_first_sunday_utc`, `april_first_sunday_utc`, + `october_last_sunday_utc`, `civil_to_epoch_days`, + `timestamp_to_ms_of_day`, `timestamp_to_date` — single canonical + module reused by mdds, fpss, flatfiles. Patch bump tdbe 0.12.9 + → 0.12.10. + + Refs #500. + ## [8.0.33] - 2026-05-07 ### Added diff --git a/ffi/Cargo.toml b/ffi/Cargo.toml index 1f00d91f..091c419a 100644 --- a/ffi/Cargo.toml +++ b/ffi/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "thetadatadx-ffi" -version = "8.0.33" +version = "8.0.34" edition.workspace = true rust-version.workspace = true authors.workspace = true @@ -31,7 +31,7 @@ testing-panic-boundary = [] [dependencies] thetadatadx = { path = "../crates/thetadatadx" } -tdbe = { version = "0.12.9", path = "../crates/tdbe" } +tdbe = { version = "0.12.10", path = "../crates/tdbe" } tokio = { version = "1.52.1", features = ["rt-multi-thread"] } # Used by the FPSS streaming callback silent-drop observability path # (see `tdx_fpss_dropped_events` / `tdx_unified_dropped_events`). Keep diff --git a/sdks/python/Cargo.lock b/sdks/python/Cargo.lock index 12876611..ad2ef2c9 100644 --- a/sdks/python/Cargo.lock +++ b/sdks/python/Cargo.lock @@ -2474,7 +2474,7 @@ checksum = "adb6935a6f5c20170eeceb1a3835a49e12e19d792f6dd344ccc76a985ca5a6ca" [[package]] name = "tdbe" -version = "0.12.9" +version = "0.12.10" dependencies = [ "sonic-rs", "thiserror 2.0.18", @@ -2495,7 +2495,7 @@ dependencies = [ [[package]] name = "thetadatadx" -version = "8.0.33" +version = "8.0.34" dependencies = [ "crossbeam-channel", "disruptor", @@ -2530,7 +2530,7 @@ dependencies = [ [[package]] name = "thetadatadx-py" -version = "8.0.33" +version = "8.0.34" dependencies = [ "arrow", "arrow-array", diff --git a/sdks/python/Cargo.toml b/sdks/python/Cargo.toml index f554d8ed..93cc590f 100644 --- a/sdks/python/Cargo.toml +++ b/sdks/python/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "thetadatadx-py" -version = "8.0.33" +version = "8.0.34" edition = "2021" description = "Python bindings for thetadatadx — native ThetaData SDK powered by Rust" license = "Apache-2.0" @@ -19,7 +19,7 @@ doc = false [dependencies] # The Rust SDK we're wrapping thetadatadx = { path = "../../crates/thetadatadx" } -tdbe = { version = "0.12.9", path = "../../crates/tdbe" } +tdbe = { version = "0.12.10", path = "../../crates/tdbe" } # Direct prost dep for decoding `thetadatadx::proto::ResponseData` bytes in # the `decode_response_bytes` hook. The main crate no longer re-exports diff --git a/sdks/typescript/Cargo.lock b/sdks/typescript/Cargo.lock index 729975d7..d11a63e2 100644 --- a/sdks/typescript/Cargo.lock +++ b/sdks/typescript/Cargo.lock @@ -2117,7 +2117,7 @@ dependencies = [ [[package]] name = "tdbe" -version = "0.12.9" +version = "0.12.10" dependencies = [ "sonic-rs", "thiserror 2.0.18", @@ -2138,7 +2138,7 @@ dependencies = [ [[package]] name = "thetadatadx" -version = "8.0.33" +version = "8.0.34" dependencies = [ "crossbeam-channel", "disruptor", @@ -2173,7 +2173,7 @@ dependencies = [ [[package]] name = "thetadatadx-napi" -version = "8.0.33" +version = "8.0.34" dependencies = [ "chrono", "napi", diff --git a/sdks/typescript/Cargo.toml b/sdks/typescript/Cargo.toml index 1eff188c..6a62e10d 100644 --- a/sdks/typescript/Cargo.toml +++ b/sdks/typescript/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "thetadatadx-napi" -version = "8.0.33" +version = "8.0.34" edition = "2021" description = "TypeScript/Node.js bindings for thetadatadx — native ThetaData SDK powered by Rust" license = "Apache-2.0" @@ -13,7 +13,7 @@ crate-type = ["cdylib"] [dependencies] thetadatadx = { path = "../../crates/thetadatadx" } -tdbe = { version = "0.12.9", path = "../../crates/tdbe" } +tdbe = { version = "0.12.10", path = "../../crates/tdbe" } napi = { version = "3.8.5", features = ["async", "tokio_rt", "serde-json", "napi6", "chrono_date"] } napi-derive = "3.5.4" diff --git a/sdks/typescript/npm/darwin-arm64/package.json b/sdks/typescript/npm/darwin-arm64/package.json index a17d0674..799d2482 100644 --- a/sdks/typescript/npm/darwin-arm64/package.json +++ b/sdks/typescript/npm/darwin-arm64/package.json @@ -1,6 +1,6 @@ { "name": "thetadatadx-darwin-arm64", - "version": "8.0.33", + "version": "8.0.34", "os": [ "darwin" ], diff --git a/sdks/typescript/npm/linux-x64-gnu/package.json b/sdks/typescript/npm/linux-x64-gnu/package.json index ff440a04..aa5382c1 100644 --- a/sdks/typescript/npm/linux-x64-gnu/package.json +++ b/sdks/typescript/npm/linux-x64-gnu/package.json @@ -1,6 +1,6 @@ { "name": "thetadatadx-linux-x64-gnu", - "version": "8.0.33", + "version": "8.0.34", "os": [ "linux" ], diff --git a/sdks/typescript/npm/win32-x64-msvc/package.json b/sdks/typescript/npm/win32-x64-msvc/package.json index 05af0ec9..a388edf6 100644 --- a/sdks/typescript/npm/win32-x64-msvc/package.json +++ b/sdks/typescript/npm/win32-x64-msvc/package.json @@ -1,6 +1,6 @@ { "name": "thetadatadx-win32-x64-msvc", - "version": "8.0.33", + "version": "8.0.34", "os": [ "win32" ], diff --git a/sdks/typescript/package.json b/sdks/typescript/package.json index 069868ea..e6d3a987 100644 --- a/sdks/typescript/package.json +++ b/sdks/typescript/package.json @@ -1,6 +1,6 @@ { "name": "thetadatadx", - "version": "8.0.33", + "version": "8.0.34", "description": "Native ThetaData SDK for Node.js — powered by Rust via napi-rs", "license": "Apache-2.0", "repository": { @@ -30,9 +30,9 @@ "@napi-rs/cli": "^3.6.2" }, "optionalDependencies": { - "thetadatadx-linux-x64-gnu": "8.0.33", - "thetadatadx-darwin-arm64": "8.0.33", - "thetadatadx-win32-x64-msvc": "8.0.33" + "thetadatadx-linux-x64-gnu": "8.0.34", + "thetadatadx-darwin-arm64": "8.0.34", + "thetadatadx-win32-x64-msvc": "8.0.34" }, "engines": { "node": ">= 20" diff --git a/tools/cli/Cargo.toml b/tools/cli/Cargo.toml index 53db62a1..dd4aa65f 100644 --- a/tools/cli/Cargo.toml +++ b/tools/cli/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "thetadatadx-cli" -version = "8.0.33" +version = "8.0.34" edition.workspace = true rust-version.workspace = true authors.workspace = true @@ -21,7 +21,7 @@ path = "src/main.rs" [dependencies] thetadatadx = { path = "../../crates/thetadatadx" } -tdbe = { version = "0.12.9", path = "../../crates/tdbe" } +tdbe = { version = "0.12.10", path = "../../crates/tdbe" } clap = { version = "4.6.1", features = ["derive"] } tokio = { version = "1.52.1", features = ["rt-multi-thread", "macros"] } sonic-rs = "0.5.8" diff --git a/tools/mcp/Cargo.lock b/tools/mcp/Cargo.lock index 0be3e784..286ce49f 100644 --- a/tools/mcp/Cargo.lock +++ b/tools/mcp/Cargo.lock @@ -1938,7 +1938,7 @@ dependencies = [ [[package]] name = "tdbe" -version = "0.12.9" +version = "0.12.10" dependencies = [ "sonic-rs", "thiserror 2.0.18", @@ -1959,7 +1959,7 @@ dependencies = [ [[package]] name = "thetadatadx" -version = "8.0.33" +version = "8.0.34" dependencies = [ "crossbeam-channel", "disruptor", @@ -1994,7 +1994,7 @@ dependencies = [ [[package]] name = "thetadatadx-mcp" -version = "8.0.33" +version = "8.0.34" dependencies = [ "serde", "sonic-rs", diff --git a/tools/mcp/Cargo.toml b/tools/mcp/Cargo.toml index cceb0153..612f129d 100644 --- a/tools/mcp/Cargo.toml +++ b/tools/mcp/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "thetadatadx-mcp" -version = "8.0.33" +version = "8.0.34" edition = "2021" description = "MCP server for ThetaDataDx — gives LLMs instant access to ThetaData market data" license = "Apache-2.0" @@ -12,7 +12,7 @@ path = "src/main.rs" [dependencies] thetadatadx = { path = "../../crates/thetadatadx" } -tdbe = { version = "0.12.9", path = "../../crates/tdbe" } +tdbe = { version = "0.12.10", path = "../../crates/tdbe" } tokio = { version = "1.52.1", features = ["rt-multi-thread", "macros", "io-util", "io-std"] } serde = { version = "1.0.228", features = ["derive"] } sonic-rs = "0.5.8" diff --git a/tools/server/Cargo.lock b/tools/server/Cargo.lock index 796d47b8..125b1a8b 100644 --- a/tools/server/Cargo.lock +++ b/tools/server/Cargo.lock @@ -2331,7 +2331,7 @@ dependencies = [ [[package]] name = "tdbe" -version = "0.12.9" +version = "0.12.10" dependencies = [ "sonic-rs", "thiserror 2.0.18", @@ -2352,7 +2352,7 @@ dependencies = [ [[package]] name = "thetadatadx" -version = "8.0.33" +version = "8.0.34" dependencies = [ "crossbeam-channel", "disruptor", @@ -2387,7 +2387,7 @@ dependencies = [ [[package]] name = "thetadatadx-server" -version = "8.0.33" +version = "8.0.34" dependencies = [ "axum", "clap", diff --git a/tools/server/Cargo.toml b/tools/server/Cargo.toml index 930a39aa..624fbd37 100644 --- a/tools/server/Cargo.toml +++ b/tools/server/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "thetadatadx-server" -version = "8.0.33" +version = "8.0.34" edition = "2021" rust-version = "1.85" authors = ["userFRM"] @@ -21,7 +21,7 @@ path = "src/main.rs" [dependencies] thetadatadx = { path = "../../crates/thetadatadx", features = ["config-file"] } rustls = { version = "0.23.38", features = ["ring"] } -tdbe = { version = "0.12.9", path = "../../crates/tdbe" } +tdbe = { version = "0.12.10", path = "../../crates/tdbe" } axum = { version = "0.8.9", features = ["ws"] } tokio = { version = "1.52.1", features = ["full"] } sonic-rs = "0.5.8"