|
24 | 24 | import numpy as np |
25 | 25 | from typing import Any, Dict, List, Optional, Tuple, Union |
26 | 26 |
|
| 27 | +_BINTYPES = { |
| 28 | + "uint8": np.uint8, |
| 29 | + "int8": np.int8, |
| 30 | + "uint16": np.uint16, |
| 31 | + "int16": np.int16, |
| 32 | + "uint32": np.uint32, |
| 33 | + "int32": np.int32, |
| 34 | + "uint64": np.uint64, |
| 35 | + "int64": np.int64, |
| 36 | + "float32": np.float32, |
| 37 | + "single": np.float32, |
| 38 | + "float64": np.float64, |
| 39 | + "double": np.float64, |
| 40 | + "bool": np.bool_, |
| 41 | + "logical": np.bool_, |
| 42 | +} |
| 43 | + |
27 | 44 |
|
28 | 45 | def jsonschema( |
29 | 46 | data: Any, schema: Any = None, **kwargs |
@@ -142,7 +159,7 @@ def _validatedata( |
142 | 159 | errors.extend(errmsg) |
143 | 160 |
|
144 | 161 | # numpy array validation |
145 | | - if isinstance(data, np.ndarray): |
| 162 | + if isinstance(data, np.ndarray) or "binType" in schema: |
146 | 163 | isvalid, errmsg = _validatebinary(data, schema, path) |
147 | 164 | if not isvalid: |
148 | 165 | valid = False |
@@ -327,92 +344,48 @@ def _validatenumeric( |
327 | 344 |
|
328 | 345 |
|
329 | 346 | def _validatebinary(data, schema: dict, path: str) -> Tuple[bool, List[str]]: |
330 | | - valid = True |
331 | | - errors = [] |
| 347 | + """Validate binary/array data against binType and dims.""" |
| 348 | + valid, errors = True, [] |
332 | 349 |
|
333 | 350 | if "binType" in schema: |
334 | | - bintype = schema["binType"] |
335 | | - typemap = { |
336 | | - "uint8": np.uint8, |
337 | | - "int8": np.int8, |
338 | | - "uint16": np.uint16, |
339 | | - "int16": np.int16, |
340 | | - "uint32": np.uint32, |
341 | | - "int32": np.int32, |
342 | | - "uint64": np.uint64, |
343 | | - "int64": np.int64, |
344 | | - "float32": np.float32, |
345 | | - "single": np.float32, |
346 | | - "float64": np.float64, |
347 | | - "double": np.float64, |
348 | | - "bool": np.bool_, |
349 | | - "logical": np.bool_, |
350 | | - } |
351 | | - if bintype not in typemap: |
352 | | - valid = False |
353 | | - errors.append(f'{path}: invalid binType "{bintype}"') |
354 | | - elif data.dtype != typemap[bintype]: |
355 | | - valid = False |
356 | | - errors.append(f"{path}: expected {bintype}, got {data.dtype}") |
357 | | - |
358 | | - actualsize = list(data.shape) |
359 | | - |
360 | | - for dimtype in ["minDims", "maxDims"]: |
361 | | - if dimtype in schema: |
362 | | - dims = schema[dimtype] |
363 | | - if isinstance(dims, (int, float)): |
364 | | - dims = [int(dims)] |
365 | | - elif isinstance(dims, (list, tuple)): |
366 | | - dims = [int(d) for d in dims] |
367 | | - |
368 | | - ismin = dimtype == "minDims" |
369 | | - |
370 | | - if len(dims) == 1: |
371 | | - # Vector check |
372 | | - isvector = data.ndim == 1 or (data.ndim == 2 and 1 in data.shape) |
373 | | - if not isvector and data.ndim > 1: |
374 | | - errors.append(f"{path}: expected 1D array for {dimtype}") |
375 | | - valid = False |
376 | | - else: |
377 | | - actual_len = max(data.shape) if data.ndim > 0 else 0 |
378 | | - if ismin and actual_len < dims[0]: |
379 | | - valid = False |
380 | | - errors.append( |
381 | | - f"{path}: length {actual_len} < {dimtype} {dims[0]}" |
382 | | - ) |
383 | | - elif not ismin and actual_len > dims[0]: |
384 | | - valid = False |
385 | | - errors.append( |
386 | | - f"{path}: length {actual_len} > {dimtype} {dims[0]}" |
387 | | - ) |
388 | | - else: |
389 | | - if ismin: |
390 | | - actualsize_ext = actualsize + [1] * max( |
391 | | - 0, len(dims) - len(actualsize) |
392 | | - ) |
393 | | - checklen = len(dims) |
394 | | - else: |
395 | | - actualsize_ext = actualsize |
396 | | - checklen = min(len(actualsize), len(dims)) |
397 | | - |
398 | | - for i in range(checklen): |
399 | | - if ismin and actualsize_ext[i] < dims[i]: |
400 | | - valid = False |
401 | | - errors.append( |
402 | | - f"{path}: dim {i} is {actualsize_ext[i]}, violates {dimtype} {dims[i]}" |
403 | | - ) |
404 | | - elif not ismin and i < len(actualsize) and actualsize[i] > dims[i]: |
405 | | - valid = False |
406 | | - errors.append( |
407 | | - f"{path}: dim {i} is {actualsize[i]}, violates {dimtype} {dims[i]}" |
408 | | - ) |
409 | | - |
410 | | - if not ismin and len(actualsize) > len(dims): |
411 | | - if any(s > 1 for s in actualsize[len(dims) :]): |
412 | | - valid = False |
413 | | - errors.append( |
414 | | - f"{path}: has {len(actualsize)} dimensions, {dimtype} only specifies {len(dims)}" |
415 | | - ) |
| 351 | + dtype = _BINTYPES.get(schema["binType"]) |
| 352 | + if dtype is None: |
| 353 | + return False, [f'{path}: invalid binType "{schema["binType"]}"'] |
| 354 | + if not isinstance(data, np.ndarray): |
| 355 | + return False, [f"{path}: expected numpy array, got {type(data).__name__}"] |
| 356 | + if data.dtype != dtype: |
| 357 | + return False, [f"{path}: expected {schema['binType']}, got {data.dtype}"] |
| 358 | + |
| 359 | + if not isinstance(data, np.ndarray): |
| 360 | + return valid, errors |
| 361 | + |
| 362 | + # Validate minDims/maxDims |
| 363 | + for dimtype in ("minDims", "maxDims"): |
| 364 | + if dimtype not in schema: |
| 365 | + continue |
| 366 | + dims = schema[dimtype] |
| 367 | + dims = [int(dims)] if isinstance(dims, (int, float)) else [int(d) for d in dims] |
| 368 | + ismin = dimtype == "minDims" |
| 369 | + |
| 370 | + if len(dims) == 1: # Vector check |
| 371 | + actual = ( |
| 372 | + max(data.shape) |
| 373 | + if data.ndim <= 2 and (data.ndim == 1 or 1 in data.shape) |
| 374 | + else -1 |
| 375 | + ) |
| 376 | + if actual < 0: |
| 377 | + valid, errors = False, errors + [f"{path}: expected 1D array"] |
| 378 | + elif (ismin and actual < dims[0]) or (not ismin and actual > dims[0]): |
| 379 | + valid, errors = False, errors + [ |
| 380 | + f"{path}: length {actual} violates {dimtype} {dims[0]}" |
| 381 | + ] |
| 382 | + else: # ND check |
| 383 | + for i, d in enumerate(dims): |
| 384 | + actual = data.shape[i] if i < data.ndim else 1 |
| 385 | + if (ismin and actual < d) or (not ismin and actual > d): |
| 386 | + valid, errors = False, errors + [ |
| 387 | + f"{path}: dim {i} is {actual}, violates {dimtype} {d}" |
| 388 | + ] |
416 | 389 |
|
417 | 390 | return valid, errors |
418 | 391 |
|
@@ -643,30 +616,13 @@ def _generatedata(schema: dict, opts: dict) -> Any: |
643 | 616 | schematype = "array" |
644 | 617 |
|
645 | 618 | if "binType" in schema: |
646 | | - bintype = schema["binType"] |
| 619 | + dtype = _BINTYPES.get(schema["binType"], np.float64) |
647 | 620 | dims = schema.get("minDims", 1) |
648 | | - if isinstance(dims, (int, float)): |
649 | | - dims = (int(dims),) |
650 | | - elif isinstance(dims, list): |
651 | | - dims = tuple(int(d) for d in dims) |
652 | | - |
653 | | - typemap = { |
654 | | - "uint8": np.uint8, |
655 | | - "int8": np.int8, |
656 | | - "uint16": np.uint16, |
657 | | - "int16": np.int16, |
658 | | - "uint32": np.uint32, |
659 | | - "int32": np.int32, |
660 | | - "uint64": np.uint64, |
661 | | - "int64": np.int64, |
662 | | - "float32": np.float32, |
663 | | - "single": np.float32, |
664 | | - "float64": np.float64, |
665 | | - "double": np.float64, |
666 | | - "bool": np.bool_, |
667 | | - "logical": np.bool_, |
668 | | - } |
669 | | - dtype = typemap.get(bintype, np.float64) |
| 621 | + dims = ( |
| 622 | + (int(dims),) |
| 623 | + if isinstance(dims, (int, float)) |
| 624 | + else tuple(int(d) for d in dims) |
| 625 | + ) |
670 | 626 | return np.zeros(dims, dtype=dtype) |
671 | 627 |
|
672 | 628 | if schematype == "null": |
@@ -873,3 +829,16 @@ def _getsubschema(schema: dict, jsonpath: str) -> Optional[dict]: |
873 | 829 | return None |
874 | 830 |
|
875 | 831 | return subschema |
| 832 | + |
| 833 | + |
| 834 | +def coerce(data: Any, schema: dict) -> Any: |
| 835 | + """Coerce data to match schema's binType. For use before assignment.""" |
| 836 | + if not isinstance(schema, dict) or "binType" not in schema: |
| 837 | + return data |
| 838 | + dtype = _BINTYPES.get(schema["binType"]) |
| 839 | + if dtype is None or (isinstance(data, np.ndarray) and data.dtype == dtype): |
| 840 | + return data |
| 841 | + try: |
| 842 | + return np.asarray(data, dtype=dtype) |
| 843 | + except (ValueError, TypeError): |
| 844 | + return data |
0 commit comments