Skip to content

Commit 2d1c28f

Browse files
committed
[feat] add xarray coords support, auto-conversion in binType/minDims/maxDims
1 parent 6b01e98 commit 2d1c28f

6 files changed

Lines changed: 490 additions & 137 deletions

File tree

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
- Copyright: (C) Qianqian Fang (2019-2026) <q.fang at neu.edu>
66
- License: Apache License, Version 2.0
7-
- Version: 0.9.0
7+
- Version: 0.9.1
88
- URL: https://github.com/NeuroJSON/pyjdata
99
- Acknowledgement: This project is supported by US National Institute of Health (NIH)
1010
grant [U24-NS124027](https://reporter.nih.gov/project-details/10308329)

jdata/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,7 @@
123123
from .jschema import jsonschema
124124
from .neurojson import neuroj, neurojgui
125125

126-
__version__ = "0.9.0"
126+
__version__ = "0.9.1"
127127
__all__ = [
128128
"loadjson",
129129
"savejson",

jdata/jdict.py

Lines changed: 91 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -149,7 +149,7 @@ def __getattr__(self, name):
149149
# Check for dimension-based indexing
150150
dims = _get_attr_value(attr, currentpath, "dims")
151151
if dims is not None and isinstance(dims, (list, tuple)) and name in dims:
152-
return _DimAccessor(self, name, dims.index(name))
152+
return _DimAccessor(self, name)
153153

154154
if data is None:
155155
val = None
@@ -583,36 +583,61 @@ def __str__(self):
583583

584584

585585
class _DimAccessor:
586-
"""Helper class for dimension-based indexing like jd.data.x(1:10)"""
586+
"""Helper class for dimension-based indexing like jd.data.x('label')"""
587587

588-
def __init__(self, parent, dimname, dimpos):
588+
__slots__ = ("_parent", "_dimname")
589+
590+
def __init__(self, parent, dimname):
589591
self._parent = parent
590592
self._dimname = dimname
591-
self._dimpos = dimpos
592-
593-
def __call__(self, indices):
594-
data = self._parent._data
595-
attr = self._parent._attr
596-
schema = self._parent._schema
597-
currentpath = self._parent._currentpath
598-
root = self._parent._root
599-
dims = _get_attr_value(attr, currentpath, "dims")
600593

601-
if isinstance(data, np.ndarray):
602-
nddata = len(dims) if dims else data.ndim
603-
idx = [slice(None)] * nddata
604-
idx[self._dimpos] = indices
605-
result = data[tuple(idx)]
594+
def __call__(self, sel):
595+
p = self._parent
596+
dims = _get_attr_value(p._attr, p._currentpath, "dims")
597+
data = p._data
598+
if not isinstance(data, np.ndarray) or not dims:
599+
return None
606600

607-
newobj = jdict.__new__(jdict)
608-
object.__setattr__(newobj, "_data", result)
609-
object.__setattr__(newobj, "_attr", attr)
610-
object.__setattr__(newobj, "_schema", schema)
611-
object.__setattr__(newobj, "_currentpath", currentpath)
612-
object.__setattr__(newobj, "_root", root)
613-
object.__setattr__(newobj, "_flags", {})
614-
return newobj
615-
return None
601+
# Get current position of this dim
602+
dimpos = dims.index(self._dimname)
603+
604+
# Build index tuple
605+
idx = [slice(None)] * data.ndim
606+
coords = _get_attr_value(p._attr, p._currentpath, "coords")
607+
idx[dimpos] = (
608+
_coordlookup(coords.get(self._dimname), sel, self._dimname)
609+
if coords
610+
else sel
611+
)
612+
613+
# Slice and build new jdict
614+
result = data[tuple(idx)]
615+
is_scalar = isinstance(idx[dimpos], (int, np.integer))
616+
617+
# Update dims/coords for cascade (remove dim if scalar selection)
618+
new_attr = {"$": {}}
619+
new_attr["$"]["dims"] = [
620+
d for d in dims if not (is_scalar and d == self._dimname)
621+
]
622+
if coords:
623+
new_attr["$"]["coords"] = {
624+
k: v
625+
for k, v in coords.items()
626+
if not (is_scalar and k == self._dimname)
627+
}
628+
629+
newobj = jdict.__new__(jdict)
630+
for attr, val in [
631+
("_data", result),
632+
("_attr", new_attr),
633+
("_schema", p._schema),
634+
("_currentpath", "$"),
635+
("_root", None),
636+
("_flags", {}),
637+
]:
638+
object.__setattr__(newobj, attr, val)
639+
object.__setattr__(newobj, "_root", newobj)
640+
return newobj
616641

617642

618643
def _get_attr_value(attr, path, name):
@@ -621,6 +646,46 @@ def _get_attr_value(attr, path, name):
621646
return None
622647

623648

649+
def _coordlookup(coords, sel, dimname):
650+
"""Convert coordinate labels to indices."""
651+
if coords is None:
652+
return sel
653+
654+
coords_arr = np.asarray(coords)
655+
is_numeric_coords = np.issubdtype(coords_arr.dtype, np.number)
656+
657+
# Numeric value(s) on numeric coords -> lookup
658+
if is_numeric_coords and isinstance(
659+
sel, (int, float, np.number, list, tuple, np.ndarray)
660+
):
661+
if isinstance(sel, (int, float, np.number)):
662+
idx = np.where(coords_arr == sel)[0]
663+
if len(idx) == 0:
664+
raise ValueError(f'Coord {sel} not found in "{dimname}"')
665+
return int(idx[0])
666+
elif all(isinstance(s, (int, float, np.number)) for s in sel):
667+
return [int(np.where(coords_arr == s)[0][0]) for s in sel]
668+
669+
# Int on non-numeric coords -> direct index
670+
if isinstance(sel, (int, np.integer)) and not is_numeric_coords:
671+
return sel
672+
673+
# Slice dict -> slice object
674+
if isinstance(sel, dict) and "start" in sel:
675+
coords_list = coords_arr.tolist()
676+
start = coords_list.index(sel["start"]) if sel.get("start") else 0
677+
stop = (
678+
coords_list.index(sel["stop"]) + 1 if sel.get("stop") else len(coords_list)
679+
)
680+
return slice(start, stop)
681+
682+
# String or list of strings -> index lookup
683+
coords_list = coords_arr.tolist()
684+
if isinstance(sel, (list, tuple)):
685+
return [coords_list.index(s) for s in sel]
686+
return coords_list.index(sel)
687+
688+
624689
def _esckey(key):
625690
"""Escape dots in key for JSONPath - Python compatible version."""
626691
if "." not in key:

jdata/jschema.py

Lines changed: 77 additions & 108 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,23 @@
2424
import numpy as np
2525
from typing import Any, Dict, List, Optional, Tuple, Union
2626

27+
_BINTYPES = {
28+
"uint8": np.uint8,
29+
"int8": np.int8,
30+
"uint16": np.uint16,
31+
"int16": np.int16,
32+
"uint32": np.uint32,
33+
"int32": np.int32,
34+
"uint64": np.uint64,
35+
"int64": np.int64,
36+
"float32": np.float32,
37+
"single": np.float32,
38+
"float64": np.float64,
39+
"double": np.float64,
40+
"bool": np.bool_,
41+
"logical": np.bool_,
42+
}
43+
2744

2845
def jsonschema(
2946
data: Any, schema: Any = None, **kwargs
@@ -142,7 +159,7 @@ def _validatedata(
142159
errors.extend(errmsg)
143160

144161
# numpy array validation
145-
if isinstance(data, np.ndarray):
162+
if isinstance(data, np.ndarray) or "binType" in schema:
146163
isvalid, errmsg = _validatebinary(data, schema, path)
147164
if not isvalid:
148165
valid = False
@@ -327,92 +344,48 @@ def _validatenumeric(
327344

328345

329346
def _validatebinary(data, schema: dict, path: str) -> Tuple[bool, List[str]]:
330-
valid = True
331-
errors = []
347+
"""Validate binary/array data against binType and dims."""
348+
valid, errors = True, []
332349

333350
if "binType" in schema:
334-
bintype = schema["binType"]
335-
typemap = {
336-
"uint8": np.uint8,
337-
"int8": np.int8,
338-
"uint16": np.uint16,
339-
"int16": np.int16,
340-
"uint32": np.uint32,
341-
"int32": np.int32,
342-
"uint64": np.uint64,
343-
"int64": np.int64,
344-
"float32": np.float32,
345-
"single": np.float32,
346-
"float64": np.float64,
347-
"double": np.float64,
348-
"bool": np.bool_,
349-
"logical": np.bool_,
350-
}
351-
if bintype not in typemap:
352-
valid = False
353-
errors.append(f'{path}: invalid binType "{bintype}"')
354-
elif data.dtype != typemap[bintype]:
355-
valid = False
356-
errors.append(f"{path}: expected {bintype}, got {data.dtype}")
357-
358-
actualsize = list(data.shape)
359-
360-
for dimtype in ["minDims", "maxDims"]:
361-
if dimtype in schema:
362-
dims = schema[dimtype]
363-
if isinstance(dims, (int, float)):
364-
dims = [int(dims)]
365-
elif isinstance(dims, (list, tuple)):
366-
dims = [int(d) for d in dims]
367-
368-
ismin = dimtype == "minDims"
369-
370-
if len(dims) == 1:
371-
# Vector check
372-
isvector = data.ndim == 1 or (data.ndim == 2 and 1 in data.shape)
373-
if not isvector and data.ndim > 1:
374-
errors.append(f"{path}: expected 1D array for {dimtype}")
375-
valid = False
376-
else:
377-
actual_len = max(data.shape) if data.ndim > 0 else 0
378-
if ismin and actual_len < dims[0]:
379-
valid = False
380-
errors.append(
381-
f"{path}: length {actual_len} < {dimtype} {dims[0]}"
382-
)
383-
elif not ismin and actual_len > dims[0]:
384-
valid = False
385-
errors.append(
386-
f"{path}: length {actual_len} > {dimtype} {dims[0]}"
387-
)
388-
else:
389-
if ismin:
390-
actualsize_ext = actualsize + [1] * max(
391-
0, len(dims) - len(actualsize)
392-
)
393-
checklen = len(dims)
394-
else:
395-
actualsize_ext = actualsize
396-
checklen = min(len(actualsize), len(dims))
397-
398-
for i in range(checklen):
399-
if ismin and actualsize_ext[i] < dims[i]:
400-
valid = False
401-
errors.append(
402-
f"{path}: dim {i} is {actualsize_ext[i]}, violates {dimtype} {dims[i]}"
403-
)
404-
elif not ismin and i < len(actualsize) and actualsize[i] > dims[i]:
405-
valid = False
406-
errors.append(
407-
f"{path}: dim {i} is {actualsize[i]}, violates {dimtype} {dims[i]}"
408-
)
409-
410-
if not ismin and len(actualsize) > len(dims):
411-
if any(s > 1 for s in actualsize[len(dims) :]):
412-
valid = False
413-
errors.append(
414-
f"{path}: has {len(actualsize)} dimensions, {dimtype} only specifies {len(dims)}"
415-
)
351+
dtype = _BINTYPES.get(schema["binType"])
352+
if dtype is None:
353+
return False, [f'{path}: invalid binType "{schema["binType"]}"']
354+
if not isinstance(data, np.ndarray):
355+
return False, [f"{path}: expected numpy array, got {type(data).__name__}"]
356+
if data.dtype != dtype:
357+
return False, [f"{path}: expected {schema['binType']}, got {data.dtype}"]
358+
359+
if not isinstance(data, np.ndarray):
360+
return valid, errors
361+
362+
# Validate minDims/maxDims
363+
for dimtype in ("minDims", "maxDims"):
364+
if dimtype not in schema:
365+
continue
366+
dims = schema[dimtype]
367+
dims = [int(dims)] if isinstance(dims, (int, float)) else [int(d) for d in dims]
368+
ismin = dimtype == "minDims"
369+
370+
if len(dims) == 1: # Vector check
371+
actual = (
372+
max(data.shape)
373+
if data.ndim <= 2 and (data.ndim == 1 or 1 in data.shape)
374+
else -1
375+
)
376+
if actual < 0:
377+
valid, errors = False, errors + [f"{path}: expected 1D array"]
378+
elif (ismin and actual < dims[0]) or (not ismin and actual > dims[0]):
379+
valid, errors = False, errors + [
380+
f"{path}: length {actual} violates {dimtype} {dims[0]}"
381+
]
382+
else: # ND check
383+
for i, d in enumerate(dims):
384+
actual = data.shape[i] if i < data.ndim else 1
385+
if (ismin and actual < d) or (not ismin and actual > d):
386+
valid, errors = False, errors + [
387+
f"{path}: dim {i} is {actual}, violates {dimtype} {d}"
388+
]
416389

417390
return valid, errors
418391

@@ -643,30 +616,13 @@ def _generatedata(schema: dict, opts: dict) -> Any:
643616
schematype = "array"
644617

645618
if "binType" in schema:
646-
bintype = schema["binType"]
619+
dtype = _BINTYPES.get(schema["binType"], np.float64)
647620
dims = schema.get("minDims", 1)
648-
if isinstance(dims, (int, float)):
649-
dims = (int(dims),)
650-
elif isinstance(dims, list):
651-
dims = tuple(int(d) for d in dims)
652-
653-
typemap = {
654-
"uint8": np.uint8,
655-
"int8": np.int8,
656-
"uint16": np.uint16,
657-
"int16": np.int16,
658-
"uint32": np.uint32,
659-
"int32": np.int32,
660-
"uint64": np.uint64,
661-
"int64": np.int64,
662-
"float32": np.float32,
663-
"single": np.float32,
664-
"float64": np.float64,
665-
"double": np.float64,
666-
"bool": np.bool_,
667-
"logical": np.bool_,
668-
}
669-
dtype = typemap.get(bintype, np.float64)
621+
dims = (
622+
(int(dims),)
623+
if isinstance(dims, (int, float))
624+
else tuple(int(d) for d in dims)
625+
)
670626
return np.zeros(dims, dtype=dtype)
671627

672628
if schematype == "null":
@@ -873,3 +829,16 @@ def _getsubschema(schema: dict, jsonpath: str) -> Optional[dict]:
873829
return None
874830

875831
return subschema
832+
833+
834+
def coerce(data: Any, schema: dict) -> Any:
835+
"""Coerce data to match schema's binType. For use before assignment."""
836+
if not isinstance(schema, dict) or "binType" not in schema:
837+
return data
838+
dtype = _BINTYPES.get(schema["binType"])
839+
if dtype is None or (isinstance(data, np.ndarray) and data.dtype == dtype):
840+
return data
841+
try:
842+
return np.asarray(data, dtype=dtype)
843+
except (ValueError, TypeError):
844+
return data

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
setup(
77
name="jdata",
88
packages=["jdata"],
9-
version="0.9.0",
9+
version="0.9.1",
1010
license="Apache license 2.0",
1111
description="JSON/binary JSON formats for exchanging Python and Numpy data",
1212
long_description=readme,

0 commit comments

Comments
 (0)