From 55b951ef2a2aacf0d347689af088f94889195a3c Mon Sep 17 00:00:00 2001
From: Eddie A Tejeda <669988+eddietejeda@users.noreply.github.com>
Date: Tue, 26 May 2026 14:12:46 -0700
Subject: [PATCH 1/7] feat: complete Arrow type mapping for parametric and
 missing types
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add support for all common Arrow type strings returned by Hotdata's
information_schema when tables are loaded from Parquet/Arrow sources.

Simple additions to _ARROW_TYPE_MAP:
- int8 → dt.Int8 (Postgres parser wrongly returns Int64 for "int8")
- halffloat → dt.Float16 (PyArrow's str() name for float16
- large_string → dt.String (PyArrow large-offset string variant)

New _parse_parametric_arrow_type() for types with embedded parameters:
- timestamp[us] / timestamp[us, tz=UTC] → dt.Timestamp(timezone=...)
- duration[ms] → dt.Interval(unit='ms')
- decimal128(10, 3) / decimal(5, 2) → dt.Decimal(precision, scale)
- list<item: T> / large_list<item: T> → dt.Array(T) (recursive)

Adds 27 new test cases covering all new patterns including timezone
variants, all 4 time units, case-insensitivity, and recursive list
element type resolution.
EOF
)
---
 src/ibis_hotdata/types.py   | 68 ++++++++++++++++++++++++++++---
 tests/test_hotdata_types.py | 79 +++++++++++++++++++++++++++++++++++++
 2 files changed, 142 insertions(+), 5 deletions(-)

diff --git a/src/ibis_hotdata/types.py b/src/ibis_hotdata/types.py
index cb7920d..832eecd 100644
--- a/src/ibis_hotdata/types.py
+++ b/src/ibis_hotdata/types.py
@@ -2,6 +2,8 @@
 
 from __future__ import annotations
 
+import re
+
 import ibis.expr.datatypes as dt
 from ibis.backends.sql.datatypes import PostgresType
 
@@ -12,18 +14,23 @@
     # dates
     "date32": dt.Date,
     "date64": dt.Date,
-    # floats
+    # floats — "halffloat" is PyArrow's str() name for float16
     "float16": dt.Float16,
     "float32": dt.Float32,
     "float64": dt.Float64,
+    "halffloat": dt.Float16,
+    # signed ints — must override Postgres parser: Postgres "int8" means 8-byte (64-bit),
+    # but Arrow "int8" means 8-bit.  int16/32/64 parse correctly via Postgres.
+    "int8": dt.Int8,
     # unsigned ints
     "uint8": dt.UInt8,
     "uint16": dt.UInt16,
     "uint32": dt.UInt32,
     "uint64": dt.UInt64,
-    # strings
+    # strings — "large_string" / "largeutf8" are PyArrow large-offset variants
     "utf8": dt.String,
     "largeutf8": dt.String,
+    "large_string": dt.String,
     # binary
     "largebinary": dt.Binary,
     # time
@@ -31,6 +38,49 @@
     "time64": dt.Time,
 }
 
+# Regex patterns for Arrow parametric types whose string representation includes
+# embedded parameters (unit, timezone, precision, value type, …).
+_TIMESTAMP_RE = re.compile(r"^timestamp\[(\w+)(?:,\s*tz=(.+))?\]$", re.IGNORECASE)
+_DURATION_RE = re.compile(r"^duration\[(\w+)\]$", re.IGNORECASE)
+_DECIMAL_RE = re.compile(r"^decimal128?\((\d+),\s*(\d+)\)$", re.IGNORECASE)
+_LIST_RE = re.compile(r"^(?:large_)?list<item:\s*(.+)>$", re.IGNORECASE)
+
+# Map Arrow time-unit strings to Ibis IntervalUnit strings.
+_ARROW_UNIT_TO_IBIS: dict[str, str] = {
+    "s": "s",
+    "ms": "ms",
+    "us": "us",
+    "ns": "ns",
+}
+
+
+def _parse_parametric_arrow_type(raw: str, *, nullable: bool) -> dt.DataType | None:
+    """Try to parse an Arrow parametric type string into an Ibis DataType.
+
+    Returns ``None`` if ``raw`` does not match any known parametric pattern,
+    allowing the caller to fall through to the Postgres dialect parser.
+    """
+    m = _TIMESTAMP_RE.match(raw)
+    if m:
+        tz: str | None = m.group(2).strip() if m.group(2) else None
+        return dt.Timestamp(timezone=tz, nullable=nullable)
+
+    m = _DURATION_RE.match(raw)
+    if m:
+        unit = _ARROW_UNIT_TO_IBIS.get(m.group(1).lower(), "s")
+        return dt.Interval(unit=unit, nullable=nullable)
+
+    m = _DECIMAL_RE.match(raw)
+    if m:
+        return dt.Decimal(precision=int(m.group(1)), scale=int(m.group(2)), nullable=nullable)
+
+    m = _LIST_RE.match(raw)
+    if m:
+        value_type = dtype_from_hotdata_sql_type(m.group(1).strip(), nullable=True)
+        return dt.Array(value_type=value_type, nullable=nullable)
+
+    return None
+
 
 def dtype_from_hotdata_sql_type(sql_type: str | None, *, nullable: bool) -> dt.DataType:
     """Best-effort mapping from Hotdata `/information_schema` column `data_type` strings.
@@ -38,17 +88,25 @@ def dtype_from_hotdata_sql_type(sql_type: str | None, *, nullable: bool) -> dt.D
     Hotdata may return either SQL-style names (``INTEGER``, ``VARCHAR``, ``DOUBLE
     PRECISION``, …) or Arrow-style names (``Date32``, ``Float64``, ``Utf8``, …).
     SQL-style names are delegated to the Postgres dialect parser; Arrow-style names
-    are resolved via an explicit lookup table before falling back to the parser.
+    are resolved via an explicit lookup table or parametric pattern before falling
+    back to the parser.
     """
     if not sql_type:
         return dt.String(nullable=nullable)
 
+    raw = sql_type.strip()
+
     # Arrow-style names (case-insensitive lookup).
-    arrow_cls = _ARROW_TYPE_MAP.get(sql_type.strip().lower())
+    arrow_cls = _ARROW_TYPE_MAP.get(raw.lower())
     if arrow_cls is not None:
         return arrow_cls(nullable=nullable)
 
+    # Arrow parametric types (timestamp[us], duration[ms], decimal128(p,s), list<…>).
+    parametric = _parse_parametric_arrow_type(raw, nullable=nullable)
+    if parametric is not None:
+        return parametric
+
     try:
-        return PostgresType.from_string(sql_type.strip(), nullable=nullable)
+        return PostgresType.from_string(raw, nullable=nullable)
     except Exception:  # ibis/sqlglot raise a variety of parse errors; fall back to String
         return dt.String(nullable=nullable)
diff --git a/tests/test_hotdata_types.py b/tests/test_hotdata_types.py
index 0e743c2..5286da6 100644
--- a/tests/test_hotdata_types.py
+++ b/tests/test_hotdata_types.py
@@ -55,6 +55,15 @@ def test_dtype_from_hotdata_malformed_fallback_string():
         ("LargeBinary", True, dt.Binary),
         ("Time32", True, dt.Time),
         ("Time64", False, dt.Time),
+        # Previously missing: signed int8 (Postgres "int8" means int64, not int8)
+        ("int8", True, dt.Int8),
+        ("Int8", False, dt.Int8),
+        # Previously missing: halffloat (PyArrow's str() name for float16)
+        ("halffloat", True, dt.Float16),
+        ("HALFFLOAT", False, dt.Float16),
+        # Previously missing: large_string (PyArrow large-offset string variant)
+        ("large_string", True, dt.String),
+        ("Large_String", False, dt.String),
         # Case-insensitive
         ("date32", True, dt.Date),
         ("FLOAT64", True, dt.Float64),
@@ -65,3 +74,73 @@ def test_dtype_from_hotdata_arrow_type_names(sql_type, nullable, expected_cls):
     out = dtype_from_hotdata_sql_type(sql_type, nullable=nullable)
     assert out.nullable is nullable
     assert isinstance(out, expected_cls)
+
+
+@pytest.mark.parametrize(
+    ("sql_type", "expected_tz"),
+    [
+        ("timestamp[s]", None),
+        ("timestamp[ms]", None),
+        ("timestamp[us]", None),
+        ("timestamp[ns]", None),
+        ("timestamp[us, tz=UTC]", "UTC"),
+        ("timestamp[us, tz=America/New_York]", "America/New_York"),
+        ("TIMESTAMP[US]", None),
+    ],
+)
+def test_dtype_from_hotdata_arrow_timestamp(sql_type, expected_tz):
+    out = dtype_from_hotdata_sql_type(sql_type, nullable=True)
+    assert isinstance(out, dt.Timestamp)
+    assert out.timezone == expected_tz
+    assert out.nullable is True
+
+
+@pytest.mark.parametrize(
+    ("sql_type", "expected_unit"),
+    [
+        ("duration[s]", "s"),
+        ("duration[ms]", "ms"),
+        ("duration[us]", "us"),
+        ("duration[ns]", "ns"),
+        ("DURATION[MS]", "ms"),
+    ],
+)
+def test_dtype_from_hotdata_arrow_duration(sql_type, expected_unit):
+    out = dtype_from_hotdata_sql_type(sql_type, nullable=False)
+    assert isinstance(out, dt.Interval)
+    assert out.unit.value == expected_unit
+    assert out.nullable is False
+
+
+@pytest.mark.parametrize(
+    ("sql_type", "expected_precision", "expected_scale"),
+    [
+        ("decimal128(10, 3)", 10, 3),
+        ("decimal128(38, 0)", 38, 0),
+        ("decimal(5, 2)", 5, 2),
+        ("DECIMAL128(18, 6)", 18, 6),
+    ],
+)
+def test_dtype_from_hotdata_arrow_decimal(sql_type, expected_precision, expected_scale):
+    out = dtype_from_hotdata_sql_type(sql_type, nullable=True)
+    assert isinstance(out, dt.Decimal)
+    assert out.precision == expected_precision
+    assert out.scale == expected_scale
+    assert out.nullable is True
+
+
+@pytest.mark.parametrize(
+    ("sql_type", "expected_value_cls"),
+    [
+        ("list<item: int32>", dt.Int32),
+        ("list<item: utf8>", dt.String),
+        ("list<item: float64>", dt.Float64),
+        ("large_list<item: int64>", dt.Int64),
+        ("LIST<ITEM: UINT8>", dt.UInt8),
+    ],
+)
+def test_dtype_from_hotdata_arrow_list(sql_type, expected_value_cls):
+    out = dtype_from_hotdata_sql_type(sql_type, nullable=True)
+    assert isinstance(out, dt.Array)
+    assert isinstance(out.value_type, expected_value_cls)
+    assert out.nullable is True

From bddb9f2e372841434b081aadf3331fd3e5bc2864 Mon Sep 17 00:00:00 2001
From: Eddie A Tejeda <669988+eddietejeda@users.noreply.github.com>
Date: Tue, 26 May 2026 14:18:02 -0700
Subject: [PATCH 2/7] fix: preserve timestamp scale and handle non-null list
 item types

Two bugs found by Codex review:

1. Timestamp scale was discarded: Arrow timestamp[ms] should map to
   dt.Timestamp(scale=3), not dt.Timestamp(). Add unit-to-scale mapping
   s=0, ms=3, us=6, ns=9, matching PyArrow convention.

2. Non-nullable list items mis-parsed: PyArrow emits
   'list<item: int32 not null>' for non-nullable item fields. Strip the
   ' not null' suffix and pass nullable=False to the recursive call so
   the element type is correctly typed instead of falling back to Unknown.
---
 src/ibis_hotdata/types.py   | 23 +++++++++++++++++++---
 tests/test_hotdata_types.py | 38 +++++++++++++++++++++----------------
 2 files changed, 42 insertions(+), 19 deletions(-)

diff --git a/src/ibis_hotdata/types.py b/src/ibis_hotdata/types.py
index 832eecd..bcfee64 100644
--- a/src/ibis_hotdata/types.py
+++ b/src/ibis_hotdata/types.py
@@ -45,13 +45,23 @@
 _DECIMAL_RE = re.compile(r"^decimal128?\((\d+),\s*(\d+)\)$", re.IGNORECASE)
 _LIST_RE = re.compile(r"^(?:large_)?list<item:\s*(.+)>$", re.IGNORECASE)
 
-# Map Arrow time-unit strings to Ibis IntervalUnit strings.
+# Map Arrow time-unit strings to Ibis IntervalUnit strings and Timestamp scales.
+# Scales follow PyArrow's convention: s→0, ms→3, us→6, ns→9.
 _ARROW_UNIT_TO_IBIS: dict[str, str] = {
     "s": "s",
     "ms": "ms",
     "us": "us",
     "ns": "ns",
 }
+_ARROW_UNIT_TO_TIMESTAMP_SCALE: dict[str, int] = {
+    "s": 0,
+    "ms": 3,
+    "us": 6,
+    "ns": 9,
+}
+
+# Suffix appended by PyArrow when a list's item field is non-nullable.
+_NOT_NULL_SUFFIX_RE = re.compile(r"\s+not\s+null$", re.IGNORECASE)
 
 
 def _parse_parametric_arrow_type(raw: str, *, nullable: bool) -> dt.DataType | None:
@@ -62,8 +72,10 @@ def _parse_parametric_arrow_type(raw: str, *, nullable: bool) -> dt.DataType | N
     """
     m = _TIMESTAMP_RE.match(raw)
     if m:
+        unit = m.group(1).lower()
         tz: str | None = m.group(2).strip() if m.group(2) else None
-        return dt.Timestamp(timezone=tz, nullable=nullable)
+        scale: int | None = _ARROW_UNIT_TO_TIMESTAMP_SCALE.get(unit)
+        return dt.Timestamp(timezone=tz, scale=scale, nullable=nullable)
 
     m = _DURATION_RE.match(raw)
     if m:
@@ -76,7 +88,12 @@ def _parse_parametric_arrow_type(raw: str, *, nullable: bool) -> dt.DataType | N
 
     m = _LIST_RE.match(raw)
     if m:
-        value_type = dtype_from_hotdata_sql_type(m.group(1).strip(), nullable=True)
+        item_raw = m.group(1).strip()
+        # PyArrow appends " not null" for non-nullable item fields; strip it and
+        # pass nullable=False so the element type is marked non-nullable.
+        item_not_null = bool(_NOT_NULL_SUFFIX_RE.search(item_raw))
+        item_str = _NOT_NULL_SUFFIX_RE.sub("", item_raw).strip()
+        value_type = dtype_from_hotdata_sql_type(item_str, nullable=not item_not_null)
         return dt.Array(value_type=value_type, nullable=nullable)
 
     return None
diff --git a/tests/test_hotdata_types.py b/tests/test_hotdata_types.py
index 5286da6..699ce25 100644
--- a/tests/test_hotdata_types.py
+++ b/tests/test_hotdata_types.py
@@ -77,21 +77,22 @@ def test_dtype_from_hotdata_arrow_type_names(sql_type, nullable, expected_cls):
 
 
 @pytest.mark.parametrize(
-    ("sql_type", "expected_tz"),
+    ("sql_type", "expected_tz", "expected_scale"),
     [
-        ("timestamp[s]", None),
-        ("timestamp[ms]", None),
-        ("timestamp[us]", None),
-        ("timestamp[ns]", None),
-        ("timestamp[us, tz=UTC]", "UTC"),
-        ("timestamp[us, tz=America/New_York]", "America/New_York"),
-        ("TIMESTAMP[US]", None),
+        ("timestamp[s]", None, 0),
+        ("timestamp[ms]", None, 3),
+        ("timestamp[us]", None, 6),
+        ("timestamp[ns]", None, 9),
+        ("timestamp[us, tz=UTC]", "UTC", 6),
+        ("timestamp[us, tz=America/New_York]", "America/New_York", 6),
+        ("TIMESTAMP[MS]", None, 3),
     ],
 )
-def test_dtype_from_hotdata_arrow_timestamp(sql_type, expected_tz):
+def test_dtype_from_hotdata_arrow_timestamp(sql_type, expected_tz, expected_scale):
     out = dtype_from_hotdata_sql_type(sql_type, nullable=True)
     assert isinstance(out, dt.Timestamp)
     assert out.timezone == expected_tz
+    assert out.scale == expected_scale
     assert out.nullable is True
 
 
@@ -130,17 +131,22 @@ def test_dtype_from_hotdata_arrow_decimal(sql_type, expected_precision, expected
 
 
 @pytest.mark.parametrize(
-    ("sql_type", "expected_value_cls"),
+    ("sql_type", "expected_value_cls", "expected_item_nullable"),
     [
-        ("list<item: int32>", dt.Int32),
-        ("list<item: utf8>", dt.String),
-        ("list<item: float64>", dt.Float64),
-        ("large_list<item: int64>", dt.Int64),
-        ("LIST<ITEM: UINT8>", dt.UInt8),
+        ("list<item: int32>", dt.Int32, True),
+        ("list<item: utf8>", dt.String, True),
+        ("list<item: float64>", dt.Float64, True),
+        ("large_list<item: int64>", dt.Int64, True),
+        ("LIST<ITEM: UINT8>", dt.UInt8, True),
+        # Non-nullable item fields — PyArrow appends " not null"
+        ("list<item: int32 not null>", dt.Int32, False),
+        ("list<item: utf8 not null>", dt.String, False),
+        ("large_list<item: float32 not null>", dt.Float32, False),
     ],
 )
-def test_dtype_from_hotdata_arrow_list(sql_type, expected_value_cls):
+def test_dtype_from_hotdata_arrow_list(sql_type, expected_value_cls, expected_item_nullable):
     out = dtype_from_hotdata_sql_type(sql_type, nullable=True)
     assert isinstance(out, dt.Array)
     assert isinstance(out.value_type, expected_value_cls)
+    assert out.value_type.nullable is expected_item_nullable
     assert out.nullable is True

From 1379aa0d3eab0bc37d8d0922287f0289ed29508d Mon Sep 17 00:00:00 2001
From: Eddie A Tejeda <669988+eddietejeda@users.noreply.github.com>
Date: Tue, 26 May 2026 14:20:44 -0700
Subject: [PATCH 3/7] fix: tighten decimal regex and fall through on unknown
 duration unit

Two nits from PR review:

- decimal regex 'decimal128?' made only the trailing 8 optional, so it
  matched the non-existent 'decimal12' form. Changed to
  'decimal(?:128|256)?' to accept decimal, decimal128, or decimal256.

- _ARROW_UNIT_TO_IBIS.get(..., 's') silently mapped any unrecognised
  duration unit (e.g. duration[foo]) to Interval(s). Now returns None
  so the caller falls through to the Postgres parser / String fallback.
---
 src/ibis_hotdata/types.py   |  6 ++++--
 tests/test_hotdata_types.py | 16 ++++++++++++++++
 2 files changed, 20 insertions(+), 2 deletions(-)

diff --git a/src/ibis_hotdata/types.py b/src/ibis_hotdata/types.py
index bcfee64..45783a1 100644
--- a/src/ibis_hotdata/types.py
+++ b/src/ibis_hotdata/types.py
@@ -42,7 +42,7 @@
 # embedded parameters (unit, timezone, precision, value type, …).
 _TIMESTAMP_RE = re.compile(r"^timestamp\[(\w+)(?:,\s*tz=(.+))?\]$", re.IGNORECASE)
 _DURATION_RE = re.compile(r"^duration\[(\w+)\]$", re.IGNORECASE)
-_DECIMAL_RE = re.compile(r"^decimal128?\((\d+),\s*(\d+)\)$", re.IGNORECASE)
+_DECIMAL_RE = re.compile(r"^decimal(?:128|256)?\((\d+),\s*(\d+)\)$", re.IGNORECASE)
 _LIST_RE = re.compile(r"^(?:large_)?list<item:\s*(.+)>$", re.IGNORECASE)
 
 # Map Arrow time-unit strings to Ibis IntervalUnit strings and Timestamp scales.
@@ -79,7 +79,9 @@ def _parse_parametric_arrow_type(raw: str, *, nullable: bool) -> dt.DataType | N
 
     m = _DURATION_RE.match(raw)
     if m:
-        unit = _ARROW_UNIT_TO_IBIS.get(m.group(1).lower(), "s")
+        unit = _ARROW_UNIT_TO_IBIS.get(m.group(1).lower())
+        if unit is None:
+            return None  # unrecognised unit — fall through to Postgres parser / String fallback
         return dt.Interval(unit=unit, nullable=nullable)
 
     m = _DECIMAL_RE.match(raw)
diff --git a/tests/test_hotdata_types.py b/tests/test_hotdata_types.py
index 699ce25..93e9cc8 100644
--- a/tests/test_hotdata_types.py
+++ b/tests/test_hotdata_types.py
@@ -113,13 +113,22 @@ def test_dtype_from_hotdata_arrow_duration(sql_type, expected_unit):
     assert out.nullable is False
 
 
+def test_dtype_from_hotdata_arrow_duration_unknown_unit_falls_back():
+    # An unrecognised duration unit should not silently map to seconds;
+    # it falls through to the Postgres parser (which returns Unknown) or String fallback.
+    out = dtype_from_hotdata_sql_type("duration[foo]", nullable=True)
+    assert not isinstance(out, dt.Interval)  # must not produce a valid Interval
+
+
 @pytest.mark.parametrize(
     ("sql_type", "expected_precision", "expected_scale"),
     [
         ("decimal128(10, 3)", 10, 3),
         ("decimal128(38, 0)", 38, 0),
+        ("decimal256(76, 38)", 76, 38),
         ("decimal(5, 2)", 5, 2),
         ("DECIMAL128(18, 6)", 18, 6),
+        # decimal12 is NOT a valid form — should not be matched by the decimal regex
     ],
 )
 def test_dtype_from_hotdata_arrow_decimal(sql_type, expected_precision, expected_scale):
@@ -150,3 +159,10 @@ def test_dtype_from_hotdata_arrow_list(sql_type, expected_value_cls, expected_it
     assert isinstance(out.value_type, expected_value_cls)
     assert out.value_type.nullable is expected_item_nullable
     assert out.nullable is True
+
+
+def test_dtype_from_hotdata_arrow_decimal12_not_matched():
+    # "decimal12" (only the trailing 8 made optional) must NOT match the decimal regex.
+    # The Postgres parser handles bare "decimal" forms; decimal12 is not a real type.
+    out = dtype_from_hotdata_sql_type("decimal12(10, 3)", nullable=True)
+    assert not isinstance(out, dt.Decimal)  # falls through to Unknown or String

From f25b43b5befdbf86c9d9ba6cd1dc077ad47b58fe Mon Sep 17 00:00:00 2001
From: Eddie A Tejeda <669988+eddietejeda@users.noreply.github.com>
Date: Tue, 26 May 2026 14:30:45 -0700
Subject: [PATCH 4/7] refactor: replace manual Ibis type construction with
 PyArrow bridge
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Instead of constructing Ibis types directly (with hand-coded unit->scale
mappings etc.), resolve Arrow type strings to PyArrow types first and
convert via ibis.formats.pyarrow.PyArrowType.to_ibis().

- Replace _ARROW_TYPE_MAP (str -> Ibis class) with _PA_TYPE_MAP
  (str -> pa.DataType instance), covering all scalar types that can
  appear as list element types
- Merge _parse_parametric_arrow_type() into _pa_type_from_arrow_str()
  which returns a pa.DataType or None
- Remove _ARROW_UNIT_TO_IBIS and _ARROW_UNIT_TO_TIMESTAMP_SCALE —
  PyArrow encodes unit/scale semantics natively
- Fix decimal256: pa.decimal128 rejects precision > 38; fall back to
  pa.decimal256 for wider values
- dtype_from_hotdata_sql_type() simplified to: try Arrow path via
  PyArrow bridge, then Postgres parser, then String fallback
---
 src/ibis_hotdata/types.py | 177 +++++++++++++++++++++-----------------
 1 file changed, 96 insertions(+), 81 deletions(-)

diff --git a/src/ibis_hotdata/types.py b/src/ibis_hotdata/types.py
index 45783a1..d64e99c 100644
--- a/src/ibis_hotdata/types.py
+++ b/src/ibis_hotdata/types.py
@@ -4,127 +4,142 @@
 
 import re
 
+import pyarrow as pa
 import ibis.expr.datatypes as dt
 from ibis.backends.sql.datatypes import PostgresType
-
-# Arrow-style type names returned by Hotdata's information_schema when tables are
-# loaded from Parquet/Arrow sources.  PostgresType.from_string() treats these as
-# USERDEFINED unknowns, so we resolve them explicitly before falling through.
-_ARROW_TYPE_MAP: dict[str, type[dt.DataType]] = {
+from ibis.formats.pyarrow import PyArrowType
+
+# Simple Arrow type strings → PyArrow instances.  Covers non-parametric types
+# that the Postgres dialect parser does not know (Arrow-specific names, unsigned
+# ints) or mis-maps (Arrow "int8" = 8-bit; Postgres "int8" = 8-byte / int64).
+# All scalar types that can appear as list/map element types must be listed here
+# because element type strings are resolved via this map, not the Postgres parser.
+_PA_TYPE_MAP: dict[str, pa.DataType] = {
     # dates
-    "date32": dt.Date,
-    "date64": dt.Date,
+    "date32": pa.date32(),
+    "date64": pa.date64(),
     # floats — "halffloat" is PyArrow's str() name for float16
-    "float16": dt.Float16,
-    "float32": dt.Float32,
-    "float64": dt.Float64,
-    "halffloat": dt.Float16,
-    # signed ints — must override Postgres parser: Postgres "int8" means 8-byte (64-bit),
-    # but Arrow "int8" means 8-bit.  int16/32/64 parse correctly via Postgres.
-    "int8": dt.Int8,
-    # unsigned ints
-    "uint8": dt.UInt8,
-    "uint16": dt.UInt16,
-    "uint32": dt.UInt32,
-    "uint64": dt.UInt64,
-    # strings — "large_string" / "largeutf8" are PyArrow large-offset variants
-    "utf8": dt.String,
-    "largeutf8": dt.String,
-    "large_string": dt.String,
+    "float16": pa.float16(),
+    "float32": pa.float32(),
+    "float64": pa.float64(),
+    "halffloat": pa.float16(),
+    # signed ints — Arrow "int8" ≠ Postgres "int8" (8-byte/int64); all four
+    # listed here so they resolve correctly when used as list element types
+    "int8": pa.int8(),
+    "int16": pa.int16(),
+    "int32": pa.int32(),
+    "int64": pa.int64(),
+    # unsigned ints (Postgres parser returns Unknown for all of these)
+    "uint8": pa.uint8(),
+    "uint16": pa.uint16(),
+    "uint32": pa.uint32(),
+    "uint64": pa.uint64(),
+    # strings — large-offset variants not known to the Postgres parser
+    "utf8": pa.utf8(),
+    "largeutf8": pa.large_utf8(),
+    "large_string": pa.large_utf8(),
+    "string": pa.string(),
     # binary
-    "largebinary": dt.Binary,
-    # time
-    "time32": dt.Time,
-    "time64": dt.Time,
+    "binary": pa.binary(),
+    "largebinary": pa.large_binary(),
+    # boolean / null
+    "bool": pa.bool_(),
+    "boolean": pa.bool_(),
+    "null": pa.null(),
+    # time — unit is absent from these bare string forms; the unit does not
+    # affect the Ibis type (both time32 and time64 map to dt.Time)
+    "time32": pa.time32("ms"),
+    "time64": pa.time64("us"),
 }
 
-# Regex patterns for Arrow parametric types whose string representation includes
-# embedded parameters (unit, timezone, precision, value type, …).
+# Regex patterns for parametric Arrow types that embed parameters in the string.
 _TIMESTAMP_RE = re.compile(r"^timestamp\[(\w+)(?:,\s*tz=(.+))?\]$", re.IGNORECASE)
 _DURATION_RE = re.compile(r"^duration\[(\w+)\]$", re.IGNORECASE)
 _DECIMAL_RE = re.compile(r"^decimal(?:128|256)?\((\d+),\s*(\d+)\)$", re.IGNORECASE)
-_LIST_RE = re.compile(r"^(?:large_)?list<item:\s*(.+)>$", re.IGNORECASE)
-
-# Map Arrow time-unit strings to Ibis IntervalUnit strings and Timestamp scales.
-# Scales follow PyArrow's convention: s→0, ms→3, us→6, ns→9.
-_ARROW_UNIT_TO_IBIS: dict[str, str] = {
-    "s": "s",
-    "ms": "ms",
-    "us": "us",
-    "ns": "ns",
-}
-_ARROW_UNIT_TO_TIMESTAMP_SCALE: dict[str, int] = {
-    "s": 0,
-    "ms": 3,
-    "us": 6,
-    "ns": 9,
-}
-
-# Suffix appended by PyArrow when a list's item field is non-nullable.
+_LIST_RE = re.compile(r"^(large_)?list<item:\s*(.+)>$", re.IGNORECASE)
+# PyArrow appends " not null" when a list's item field is non-nullable.
 _NOT_NULL_SUFFIX_RE = re.compile(r"\s+not\s+null$", re.IGNORECASE)
 
 
-def _parse_parametric_arrow_type(raw: str, *, nullable: bool) -> dt.DataType | None:
-    """Try to parse an Arrow parametric type string into an Ibis DataType.
+def _pa_type_from_arrow_str(raw: str) -> pa.DataType | None:
+    """Best-effort: Arrow type string → PyArrow DataType, or ``None`` if not recognised.
 
-    Returns ``None`` if ``raw`` does not match any known parametric pattern,
-    allowing the caller to fall through to the Postgres dialect parser.
+    Handles simple names (via ``_PA_TYPE_MAP``) and parametric forms
+    (timestamp, duration, decimal, list/large_list).  Returns ``None`` if the
+    string is not a known Arrow type, allowing the caller to fall through to the
+    Postgres dialect parser or String fallback.
     """
-    m = _TIMESTAMP_RE.match(raw)
+    s = raw.strip()
+
+    # Simple non-parametric types.
+    pa_type = _PA_TYPE_MAP.get(s.lower())
+    if pa_type is not None:
+        return pa_type
+
+    # timestamp[unit] or timestamp[unit, tz=…]
+    m = _TIMESTAMP_RE.match(s)
     if m:
         unit = m.group(1).lower()
         tz: str | None = m.group(2).strip() if m.group(2) else None
-        scale: int | None = _ARROW_UNIT_TO_TIMESTAMP_SCALE.get(unit)
-        return dt.Timestamp(timezone=tz, scale=scale, nullable=nullable)
+        try:
+            return pa.timestamp(unit, tz=tz)
+        except Exception:
+            return None
 
-    m = _DURATION_RE.match(raw)
+    # duration[unit] — unknown units return None so the caller falls through
+    m = _DURATION_RE.match(s)
     if m:
-        unit = _ARROW_UNIT_TO_IBIS.get(m.group(1).lower())
-        if unit is None:
-            return None  # unrecognised unit — fall through to Postgres parser / String fallback
-        return dt.Interval(unit=unit, nullable=nullable)
+        try:
+            return pa.duration(m.group(1).lower())
+        except Exception:
+            return None
 
-    m = _DECIMAL_RE.match(raw)
+    # decimal / decimal128 / decimal256
+    m = _DECIMAL_RE.match(s)
     if m:
-        return dt.Decimal(precision=int(m.group(1)), scale=int(m.group(2)), nullable=nullable)
-
-    m = _LIST_RE.match(raw)
+        precision, scale = int(m.group(1)), int(m.group(2))
+        try:
+            # decimal128 supports precision 1–38; fall back to decimal256 for wider values
+            return pa.decimal128(precision, scale) if precision <= 38 else pa.decimal256(precision, scale)
+        except Exception:
+            return None
+
+    # list<item: T> or large_list<item: T> (recursive for nested types)
+    m = _LIST_RE.match(s)
     if m:
-        item_raw = m.group(1).strip()
-        # PyArrow appends " not null" for non-nullable item fields; strip it and
-        # pass nullable=False so the element type is marked non-nullable.
+        is_large = bool(m.group(1))
+        item_raw = m.group(2).strip()
         item_not_null = bool(_NOT_NULL_SUFFIX_RE.search(item_raw))
         item_str = _NOT_NULL_SUFFIX_RE.sub("", item_raw).strip()
-        value_type = dtype_from_hotdata_sql_type(item_str, nullable=not item_not_null)
-        return dt.Array(value_type=value_type, nullable=nullable)
+        item_pa_type = _pa_type_from_arrow_str(item_str)
+        if item_pa_type is None:
+            return None
+        item_field = pa.field("item", item_pa_type, nullable=not item_not_null)
+        return pa.large_list(item_field) if is_large else pa.list_(item_field)
 
     return None
 
 
 def dtype_from_hotdata_sql_type(sql_type: str | None, *, nullable: bool) -> dt.DataType:
-    """Best-effort mapping from Hotdata `/information_schema` column `data_type` strings.
+    """Best-effort mapping from Hotdata ``/information_schema`` column ``data_type`` strings.
 
     Hotdata may return either SQL-style names (``INTEGER``, ``VARCHAR``, ``DOUBLE
     PRECISION``, …) or Arrow-style names (``Date32``, ``Float64``, ``Utf8``, …).
-    SQL-style names are delegated to the Postgres dialect parser; Arrow-style names
-    are resolved via an explicit lookup table or parametric pattern before falling
-    back to the parser.
+    Arrow-style names are resolved via PyArrow's type system and converted to Ibis
+    types using the Ibis–PyArrow bridge; SQL-style names fall through to the Postgres
+    dialect parser.
     """
     if not sql_type:
         return dt.String(nullable=nullable)
 
     raw = sql_type.strip()
 
-    # Arrow-style names (case-insensitive lookup).
-    arrow_cls = _ARROW_TYPE_MAP.get(raw.lower())
-    if arrow_cls is not None:
-        return arrow_cls(nullable=nullable)
-
-    # Arrow parametric types (timestamp[us], duration[ms], decimal128(p,s), list<…>).
-    parametric = _parse_parametric_arrow_type(raw, nullable=nullable)
-    if parametric is not None:
-        return parametric
+    # Try to parse as an Arrow type string (simple or parametric).
+    pa_type = _pa_type_from_arrow_str(raw)
+    if pa_type is not None:
+        return PyArrowType.to_ibis(pa_type).copy(nullable=nullable)
 
+    # Fall through to Postgres dialect parser for SQL-style type names.
     try:
         return PostgresType.from_string(raw, nullable=nullable)
     except Exception:  # ibis/sqlglot raise a variety of parse errors; fall back to String

From 80c82ed5c711b85a377d0ad25406e4106bea8473 Mon Sep 17 00:00:00 2001
From: Eddie A Tejeda <669988+eddietejeda@users.noreply.github.com>
Date: Tue, 26 May 2026 14:50:37 -0700
Subject: [PATCH 5/7] refactor: clean up code quality issues

- backend.py: replace walrus-then-overwrite pattern with plain assignments
  in _to_catalog_db_tuple
- backend.py: getattr(self, '_http', None) is not None -> hasattr(self, '_http')
- backend.py: add explanatory comments to the silent pass in
  _resolve_database_connection_id and the no-op _register_in_memory_table
- http.py: replace magic HTTP status integers with http.HTTPStatus constants
- http.py: replace range(len(columns)) index loop with direct field iteration
- types.py: cache m.group(2) to avoid calling it twice
- Remove managed.py (single-constant module); inline 'public' directly at
  the two call sites in backend.py and http.py
---
 src/ibis_hotdata/backend.py | 15 ++++++++-------
 src/ibis_hotdata/http.py    | 19 +++++++++----------
 src/ibis_hotdata/managed.py |  5 -----
 src/ibis_hotdata/types.py   |  3 ++-
 4 files changed, 19 insertions(+), 23 deletions(-)
 delete mode 100644 src/ibis_hotdata/managed.py

diff --git a/src/ibis_hotdata/backend.py b/src/ibis_hotdata/backend.py
index cccc6cf..c09b0ea 100644
--- a/src/ibis_hotdata/backend.py
+++ b/src/ibis_hotdata/backend.py
@@ -43,7 +43,6 @@
 from ibis.backends.sql import SQLBackend
 
 from ibis_hotdata.http import HotdataAPIError, HotdataClient
-from ibis_hotdata.managed import DEFAULT_SCHEMA
 from ibis_hotdata.types import dtype_from_hotdata_sql_type
 
 _INFORMATION_SCHEMA_PAGE_SIZE = 500
@@ -203,7 +202,7 @@ def do_connect(
         )
 
     def disconnect(self) -> None:
-        if getattr(self, "_http", None) is not None:
+        if hasattr(self, "_http"):
             self._http.close()
 
     # --- hierarchy ---------------------------------------------------------
@@ -253,10 +252,12 @@ def _to_catalog_db_tuple(self, table_loc: sge.Table):
         """Use the compiler SQL dialect when stringifying qualifiers (backend name is not a dialect)."""
 
         dialect = self.dialect
-        if (sg_cat := table_loc.args["catalog"]) is not None:
+        sg_cat = table_loc.args["catalog"]
+        if sg_cat is not None:
             sg_cat.args["quoted"] = False
             sg_cat = sg_cat.sql(dialect=dialect)
-        if (sg_db := table_loc.args["db"]) is not None:
+        sg_db = table_loc.args["db"]
+        if sg_db is not None:
             sg_db.args["quoted"] = False
             sg_db = sg_db.sql(dialect=dialect)
 
@@ -429,7 +430,7 @@ def _resolve_database_connection_id(self) -> str | None:
                 db = self._http.get_database(self._database_id)
                 self._database_connection_id = db.get("default_connection_id")
             except HotdataAPIError:
-                pass
+                pass  # best-effort: if the lookup fails, callers fall back to the catalog name
         return self._database_connection_id
 
     # --- schema / sql execution --------------------------------------------
@@ -575,7 +576,7 @@ def create_database(
         /,
         *,
         catalog: str | None = None,
-        schema: str = DEFAULT_SCHEMA,
+        schema: str = "public",
         tables: Sequence[str] | None = None,
         force: bool = False,
     ) -> None:
@@ -722,7 +723,7 @@ def drop_table(
             raise _ibis_err_from_hotdata(exc) from exc
 
     def _register_in_memory_table(self, _op: ops.InMemoryTable) -> None:
-        return
+        pass  # Hotdata has no local in-memory table concept; Ibis calls this hook before execute
 
     @cached_property
     def version(self) -> str:
diff --git a/src/ibis_hotdata/http.py b/src/ibis_hotdata/http.py
index 3ffec5a..a71b07b 100644
--- a/src/ibis_hotdata/http.py
+++ b/src/ibis_hotdata/http.py
@@ -2,6 +2,7 @@
 
 from __future__ import annotations
 
+import http
 import io
 import json
 import time
@@ -30,8 +31,6 @@
 from hotdata.models.database_default_table_decl import DatabaseDefaultTableDecl
 from hotdata.models.load_managed_table_request import LoadManagedTableRequest
 
-from ibis_hotdata.managed import DEFAULT_SCHEMA
-
 T = TypeVar("T")
 
 # Matches Hotdata / runtimedb ``GET /v1/results/{{id}}`` Arrow responses.
@@ -197,7 +196,7 @@ def create_managed_database(
         self,
         description: str | None = None,
         *,
-        schema: str = DEFAULT_SCHEMA,
+        schema: str = "public",
         tables: Sequence[str] = (),
     ) -> dict[str, Any]:
         """POST ``/v1/databases`` — creates a managed database with an auto-provisioned default catalog."""
@@ -264,27 +263,27 @@ def _poll_result_arrow(
             status = raw.status
             ctype = (raw.headers.get("Content-Type") or "").split(";")[0].strip().lower()
 
-            if status == 200 and ctype == APPLICATION_ARROW_STREAM.lower():
+            if status == http.HTTPStatus.OK and ctype == APPLICATION_ARROW_STREAM.lower():
                 table = _ipc_stream_bytes_to_table(body)
                 return self._arrow_payload_from_table(table, result_id=result_id)
 
-            if status == 202:
+            if status == http.HTTPStatus.ACCEPTED:
                 _sleep_until(deadline, poll_interval_s)
                 continue
 
-            if status == 409:
+            if status == http.HTTPStatus.CONFLICT:
                 d = _json_utf8(body) if body else {}
                 raise HotdataAPIError(
                     d.get("error_message") or "Result failed",
-                    status_code=409,
+                    status_code=http.HTTPStatus.CONFLICT,
                     body=d,
                 )
 
-            if status == 404:
+            if status == http.HTTPStatus.NOT_FOUND:
                 d = _json_utf8(body) if body else {}
                 raise HotdataAPIError(
                     d.get("detail") or f"Result {result_id!r} not found",
-                    status_code=404,
+                    status_code=http.HTTPStatus.NOT_FOUND,
                     body=d,
                 )
 
@@ -304,7 +303,7 @@ def _arrow_payload_from_table(
     ) -> dict[str, Any]:
         sch = table.schema
         columns = sch.names
-        nullable = [sch.field(i).nullable for i in range(len(columns))]
+        nullable = [field.nullable for field in sch]
         return {
             "format": "arrow",
             "pa_table": table,
diff --git a/src/ibis_hotdata/managed.py b/src/ibis_hotdata/managed.py
deleted file mode 100644
index 972c366..0000000
--- a/src/ibis_hotdata/managed.py
+++ /dev/null
@@ -1,5 +0,0 @@
-"""Helpers for Hotdata managed databases."""
-
-from __future__ import annotations
-
-DEFAULT_SCHEMA = "public"
diff --git a/src/ibis_hotdata/types.py b/src/ibis_hotdata/types.py
index d64e99c..7ddaa49 100644
--- a/src/ibis_hotdata/types.py
+++ b/src/ibis_hotdata/types.py
@@ -80,7 +80,8 @@ def _pa_type_from_arrow_str(raw: str) -> pa.DataType | None:
     m = _TIMESTAMP_RE.match(s)
     if m:
         unit = m.group(1).lower()
-        tz: str | None = m.group(2).strip() if m.group(2) else None
+        tz_raw = m.group(2)
+        tz: str | None = tz_raw.strip() if tz_raw else None
         try:
             return pa.timestamp(unit, tz=tz)
         except Exception:

From 328185247c24a13bc8029d5d5dec2246f0cdfdf5 Mon Sep 17 00:00:00 2001
From: Eddie A Tejeda <669988+eddietejeda@users.noreply.github.com>
Date: Tue, 26 May 2026 14:54:37 -0700
Subject: [PATCH 6/7] docs: update README to reflect current implementation

- Correct ibis-framework version requirement to >=10,<11
- Document all connect() optional parameters with inline comments
- Add URL query string example with optional parameters
- Document schema-only create_table (empty table from schema)
- Document force=True on drop operations
- Note to_pyarrow_batches() downloads full result then splits locally
- Add in-memory tables (unsupported) and Arrow type mapping to feature table
---
 README.md | 43 ++++++++++++++++++++++++++++++++++++-------
 1 file changed, 36 insertions(+), 7 deletions(-)

diff --git a/README.md b/README.md
index 8b836e0..e8e1e5a 100644
--- a/README.md
+++ b/README.md
@@ -2,7 +2,7 @@
 
 Use [Ibis](https://ibis-project.org/) to create on-demand databases, upload data, and query with Python expressions — get pandas or Arrow results back without writing SQL.
 
-**Requirements:** Python 3.10+, **ibis-framework** 10.x, **hotdata** ≥0.2.3.
+**Requirements:** Python 3.10+, **ibis-framework** ≥10,<11, **hotdata** ≥0.2.3.
 
 ## Install
 
@@ -60,13 +60,26 @@ con = ibis.hotdata.connect(
     api_url="https://api.hotdata.dev",
     token="YOUR_API_KEY",
     workspace_id="ws_...",
+    # optional
+    session_id=None,           # sandbox id (X-Session-Id header)
+    timeout=120.0,             # per-request HTTP timeout in seconds
+    verify_ssl=True,           # False to skip TLS verification, or path to CA bundle
+    default_connection=None,   # default catalog (connection id); auto-detected if only one exists
+    default_schema=None,       # default schema; auto-detected if only one exists
+    database_id=None,          # bind an existing managed database id at connect time
+    poll_interval_s=0.25,      # polling interval for async queries
+    poll_timeout_s=600.0,      # max time to wait for a query result
 )
 ```
 
-URL-style also works:
+URL-style also works, with the same parameters as query string keys:
 
 ```python
-con = ibis.connect("hotdata://api.hotdata.dev/?token=...&workspace_id=ws_...")
+con = ibis.connect(
+    "hotdata://api.hotdata.dev/"
+    "?token=...&workspace_id=ws_..."
+    "&default_connection=my_conn&default_schema=public"
+)
 ```
 
 ## Managed databases
@@ -86,9 +99,17 @@ con.create_table("events", events_df, database=("analytics", "public"), overwrit
 import pyarrow as pa
 table = pa.table({"id": [1, 2], "name": ["alice", "bob"]})
 con.create_table("users", table, database=("analytics", "public"), overwrite=True)
+
+# Schema-only (no data): creates an empty table with the declared schema
+import ibis.expr.schema as sch
+con.create_table(
+    "staging",
+    schema=sch.Schema({"id": "int64", "ts": "timestamp"}),
+    database=("analytics", "public"),
+)
 ```
 
-Table names must be declared when the database is created — you cannot add new table names later without recreating the database.
+Table names must be declared when the database is created — you cannot upload to a table name that was not listed in `tables=`.
 
 ### Query
 
@@ -118,9 +139,14 @@ result = con.sql(
 
 ### Delete
 
+Pass `force=True` to silently skip errors when the database or table does not exist:
+
 ```python
 con.drop_table("events", database=("analytics", "public"))
+con.drop_table("events", database=("analytics", "public"), force=True)  # no-op if missing
+
 con.drop_database("analytics")
+con.drop_database("analytics", force=True)  # no-op if missing
 ```
 
 ### Addressing summary
@@ -146,7 +172,7 @@ summary = (
 )
 ```
 
-`.execute()` returns a **pandas DataFrame**. Use `.to_pyarrow()` for an Arrow table or `.to_pyarrow_batches()` to stream batches without materializing the full result.
+`.execute()` returns a **pandas DataFrame**. `.to_pyarrow()` returns an Arrow table. `.to_pyarrow_batches()` returns a `RecordBatchReader` — note that Hotdata returns a single Arrow IPC payload per query, so this method downloads the full result first and then splits it into local batches.
 
 ### Raw SQL
 
@@ -189,17 +215,20 @@ con.list_tables(database=("my_postgres", "public"))    # tables
 | Feature | Status |
 |---------|--------|
 | `create_database` / `drop_database` (managed) | ✅ |
-| `create_table` / `drop_table` (DataFrame or Arrow upload) | ✅ |
+| `create_table` from pandas / PyArrow / schema-only | ✅ |
+| `drop_table` | ✅ |
 | `con.table(...)` with full schema metadata | ✅ |
 | Ibis expressions: filter, select, join, group\_by, agg, order\_by, limit | ✅ |
 | `con.sql(...)` raw SQL | ✅ |
 | `.execute()` → pandas, `.to_pyarrow()`, `.to_pyarrow_batches()` | ✅ |
 | `list_catalogs`, `list_databases`, `list_tables` | ✅ |
+| Arrow / Parquet column types (timestamp, decimal, list, duration, …) | ✅ |
 | Temporary tables | ❌ |
+| In-memory tables (`ibis.memtable(...)`) | ❌ |
 | Python UDFs | ❌ |
 | INSERT / UPDATE / DELETE on external connections | ❌ |
 
-SQL compilation uses Ibis's Postgres dialect. Use `con.sql(...)` as a fallback for expressions that don't compile cleanly.
+SQL compilation uses Ibis's Postgres dialect. Column types returned by Hotdata's information schema are resolved via PyArrow's type system, so Parquet-loaded tables with Arrow-native types (timestamps with time zones, decimals, lists, durations) are mapped correctly to Ibis types.
 
 ## Development
 

From c76cc61a3d1ea051ecb4ae08f15eead4f18ce538 Mon Sep 17 00:00:00 2001
From: Eddie A Tejeda <669988+eddietejeda@users.noreply.github.com>
Date: Tue, 26 May 2026 15:07:52 -0700
Subject: [PATCH 7/7] fix: align default schema and query run statuses with
 runtimedb
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Change default schema public to main to match runtimedb DEFAULT_SCHEMA_NAME;
  runtimedb auto-inserts main into every managed database, so using public
  silently created a spurious empty main schema alongside the declared one
- Trim _IN_FLIGHT to {running} — runtimedb QueryRunStatus only emits
  running, succeeded, and failed; queued and pending are result statuses
- Update README examples to use main schema throughout
---
 README.md                   | 28 ++++++++++++++--------------
 src/ibis_hotdata/backend.py |  2 +-
 src/ibis_hotdata/http.py    |  3 ++-
 3 files changed, 17 insertions(+), 16 deletions(-)

diff --git a/README.md b/README.md
index e8e1e5a..bc5760a 100644
--- a/README.md
+++ b/README.md
@@ -25,7 +25,7 @@ con = ibis.hotdata.connect(
 )
 
 # 1. Create a database and declare the tables you'll load
-con.create_database("sales", schema="public", tables=["orders"])
+con.create_database("sales", tables=["orders"])
 
 # 2. Upload a pandas DataFrame (or PyArrow table)
 df = pd.DataFrame({
@@ -33,14 +33,14 @@ df = pd.DataFrame({
     "amount": [9.99, 49.99, 5.00],
     "region": ["west", "east", "west"],
 })
-con.create_table("orders", df, database=("sales", "public"), overwrite=True)
+con.create_table("orders", df, database=("sales", "main"), overwrite=True)
 
 # 3. Uploads are async — wait briefly before querying
 time.sleep(2)
 
 # 4. Query with Ibis expressions
 #    Managed tables are always accessed with catalog "default"
-t = con.table("orders", database=("default", "public"))
+t = con.table("orders", database=("default", "main"))
 result = (
     t.group_by("region")
     .agg(total=t.amount.sum())
@@ -49,7 +49,7 @@ result = (
 )
 
 # 5. Clean up
-con.drop_table("orders", database=("sales", "public"))
+con.drop_table("orders", database=("sales", "main"))
 con.drop_database("sales")
 ```
 
@@ -90,22 +90,22 @@ Managed databases are the primary way to bring data into Hotdata with Ibis. Decl
 
 ```python
 # Declare the database and all table names up front
-con.create_database("analytics", schema="public", tables=["events", "users"])
+con.create_database("analytics", tables=["events", "users"])
 
 # Upload from a pandas DataFrame
-con.create_table("events", events_df, database=("analytics", "public"), overwrite=True)
+con.create_table("events", events_df, database=("analytics", "main"), overwrite=True)
 
 # PyArrow tables also work
 import pyarrow as pa
 table = pa.table({"id": [1, 2], "name": ["alice", "bob"]})
-con.create_table("users", table, database=("analytics", "public"), overwrite=True)
+con.create_table("users", table, database=("analytics", "main"), overwrite=True)
 
 # Schema-only (no data): creates an empty table with the declared schema
 import ibis.expr.schema as sch
 con.create_table(
     "staging",
     schema=sch.Schema({"id": "int64", "ts": "timestamp"}),
-    database=("analytics", "public"),
+    database=("analytics", "main"),
 )
 ```
 
@@ -116,7 +116,7 @@ Table names must be declared when the database is created — you cannot upload
 When querying, use `"default"` as the catalog:
 
 ```python
-t = con.table("events", database=("default", "public"))
+t = con.table("events", database=("default", "main"))
 
 result = (
     t.filter(t.event_type == "click")
@@ -131,7 +131,7 @@ Or with raw SQL:
 ```python
 result = con.sql(
     'SELECT user_id, COUNT(*) AS n '
-    'FROM "default"."public"."events" '
+    'FROM "default"."main"."events" '
     'WHERE event_type = \'click\' '
     'GROUP BY user_id'
 ).execute()
@@ -142,8 +142,8 @@ result = con.sql(
 Pass `force=True` to silently skip errors when the database or table does not exist:
 
 ```python
-con.drop_table("events", database=("analytics", "public"))
-con.drop_table("events", database=("analytics", "public"), force=True)  # no-op if missing
+con.drop_table("events", database=("analytics", "main"))
+con.drop_table("events", database=("analytics", "main"), force=True)  # no-op if missing
 
 con.drop_database("analytics")
 con.drop_database("analytics", force=True)  # no-op if missing
@@ -161,7 +161,7 @@ con.drop_database("analytics", force=True)  # no-op if missing
 ### Ibis expressions
 
 ```python
-t = con.table("orders", database=("default", "public"))
+t = con.table("orders", database=("default", "main"))
 
 summary = (
     t.filter(t.amount > 10)
@@ -178,7 +178,7 @@ summary = (
 
 ```python
 base = con.sql(
-    'SELECT * FROM "default"."public"."orders"',
+    'SELECT * FROM "default"."main"."orders"',
     dialect="postgres",
 )
 result = base.filter(base.amount > 10).execute()
diff --git a/src/ibis_hotdata/backend.py b/src/ibis_hotdata/backend.py
index c09b0ea..cfdb1d4 100644
--- a/src/ibis_hotdata/backend.py
+++ b/src/ibis_hotdata/backend.py
@@ -576,7 +576,7 @@ def create_database(
         /,
         *,
         catalog: str | None = None,
-        schema: str = "public",
+        schema: str = "main",
         tables: Sequence[str] | None = None,
         force: bool = False,
     ) -> None:
diff --git a/src/ibis_hotdata/http.py b/src/ibis_hotdata/http.py
index a71b07b..bd51cd9 100644
--- a/src/ibis_hotdata/http.py
+++ b/src/ibis_hotdata/http.py
@@ -37,7 +37,8 @@
 APPLICATION_ARROW_STREAM = "application/vnd.apache.arrow.stream"
 
 # Statuses that mean the query run is still in progress.
-_IN_FLIGHT = {"running", "queued", "pending"}
+# runtimedb QueryRunStatus only emits "running", "succeeded", "failed".
+_IN_FLIGHT = {"running"}
 
 
 def _sleep_until(deadline: float, interval: float) -> None: