Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 26 additions & 5 deletions src/mcp_massive/store.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,21 +169,42 @@ def from_records(cls, records: list[dict]) -> "Table":

Preserves insertion order of keys across all records.
Fills missing keys with None.
When two columns differ only in case (e.g. ``T`` and ``t``),
the later column is renamed with a ``_2`` suffix to avoid
SQLite case-insensitive collisions.
"""
if not records:
return cls([], {})
# Collect all keys in insertion order
# Collect all keys in insertion order (original casing)
seen: set[str] = set()
columns: list[str] = []
raw_columns: list[str] = []
for rec in records:
for key in rec:
if key not in seen:
seen.add(key)
columns.append(key)
raw_columns.append(key)

# Deduplicate case-insensitive collisions for SQLite compatibility
ci_seen: set[str] = set()
columns: list[str] = []
col_map: dict[str, str] = {} # original key -> final column name
for col in raw_columns:
if col.lower() in ci_seen:
renamed = f"{col}_2"
while renamed.lower() in ci_seen:
renamed += "_2"
columns.append(renamed)
ci_seen.add(renamed.lower())
col_map[col] = renamed
else:
columns.append(col)
ci_seen.add(col.lower())
col_map[col] = col

data: dict[str, list] = {col: [] for col in columns}
for rec in records:
for col in columns:
data[col].append(rec.get(col))
for raw_col in raw_columns:
data[col_map[raw_col]].append(rec.get(raw_col))
return cls(columns, data)

def __len__(self) -> int:
Expand Down
44 changes: 44 additions & 0 deletions tests/test_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,28 @@ def test_from_records_empty_list(self):
assert t.columns == []
assert len(t) == 0

def test_from_records_preserves_casing(self):
t = Table.from_records([{"Name": "Alice", "AGE": 30}])
assert t.columns == ["Name", "AGE"]
assert t["Name"] == ["Alice"]
assert t["AGE"] == [30]

def test_from_records_deduplicates_case_insensitive_columns(self):
"""Massive.com returns both T (ticker) and t (timestamp)."""
t = Table.from_records([{"T": "AAPL", "v": 100.0, "t": 1704067200000}])
assert t.columns == ["T", "v", "t_2"]
assert t["T"] == ["AAPL"]
assert t["v"] == [100.0]
assert t["t_2"] == [1704067200000]

def test_from_records_dedup_multiple_collisions(self):
t = Table.from_records([{"a": 1, "A": 2, "A_2": 3}])
# a preserved, A collides -> A_2, A_2 collides with A_2 -> A_2_2
assert t.columns == ["a", "A_2", "A_2_2"]
assert t["a"] == [1]
assert t["A_2"] == [2]
assert t["A_2_2"] == [3]

def test_len(self):
t = Table(["x"], {"x": [1, 2, 3]})
assert len(t) == 3
Expand Down Expand Up @@ -538,6 +560,28 @@ def test_check_duplicate_columns_passes_for_unique(self):
# Should not raise
s._check_duplicate_columns(tbl)

def test_store_and_query_case_insensitive_columns(self):
"""End-to-end: store records with T/t columns, query via SQL."""
s = DataFrameStore()
records = [
{
"T": "AAPL",
"v": 45000000.0,
"vw": 150.5,
"o": 149.0,
"c": 151.0,
"h": 152.0,
"l": 148.0,
"t": 1704067200000,
"n": 500000,
},
]
result = s.store("prices", records)
assert result.row_count == 1

csv = s.query("SELECT T, v, t_2 FROM prices")
assert csv == "T,v,t_2\nAAPL,45000000.0,1704067200000\n"


class TestScalarSubqueryRewrite:
def test_scalar_subquery_in_select_is_rewritten(self):
Expand Down
Loading